Commit ddc1a5cb authored by Hugh Dickins's avatar Hugh Dickins Committed by Andrew Morton
Browse files

mempolicy: alloc_pages_mpol() for NUMA policy without vma

Shrink shmem's stack usage by eliminating the pseudo-vma from its folio
allocation.  alloc_pages_mpol(gfp, order, pol, ilx, nid) becomes the
principal actor for passing mempolicy choice down to __alloc_pages(),
rather than vma_alloc_folio(gfp, order, vma, addr, hugepage).

vma_alloc_folio() and alloc_pages() remain, but as wrappers around
alloc_pages_mpol().  alloc_pages_bulk_*() untouched, except to provide the
additional args to policy_nodemask(), which subsumes policy_node(). 
Cleanup throughout, cutting out some unhelpful "helpers".

It would all be much simpler without MPOL_INTERLEAVE, but that adds a
dynamic to the constant mpol: complicated by v3.6 commit 09c231cb
("tmpfs: distribute interleave better across nodes"), which added ino bias
to the interleave, hidden from mm/mempolicy.c until this commit.

Hence "ilx" throughout, the "interleave index".  Originally I thought it
could be done just with nid, but that's wrong: the nodemask may come from
the shared policy layer below a shmem vma, or it may come from the task
layer above a shmem vma; and without the final nodemask then nodeid cannot
be decided.  And how ilx is applied depends also on page order.

The interleave index is almost always irrelevant unless MPOL_INTERLEAVE:
with one exception in alloc_pages_mpol(), where the NO_INTERLEAVE_INDEX
passed down from vma-less alloc_pages() is also used as hint not to use
THP-style hugepage allocation - to avoid the overhead of a hugepage arg
(though I don't understand why we never just added a GFP bit for THP - if
it actually needs a different allocation strategy from other pages of the
same order).  vma_alloc_folio() still carries its hugepage arg here, but
it is not used, and should be removed when agreed.

get_vma_policy() no longer allows a NULL vma: over time I believe we've
eradicated all the places which used to need it e.g.  swapoff and madvise
used to pass NULL vma to read_swap_cache_async(), but now know the vma.

[hughd@google.com: handle NULL mpol being passed to __read_swap_cache_async()]
  Link: https://lkml.kernel.org/r/ea419956-4751-0102-21f7-9c93cb957892@google.com
Link: https://lkml.kernel.org/r/74e34633-6060-f5e3-aee-7040d43f2e93@google.com
Link: https://lkml.kernel.org/r/1738368e-bac0-fd11-ed7f-b87142a939fe@google.com


Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tejun heo <tj@kernel.org>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Domenico Cerasuolo <mimmocerasuolo@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 23e48832
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -2673,8 +2673,9 @@ static int show_numa_map(struct seq_file *m, void *v)
	struct numa_maps *md = &numa_priv->md;
	struct file *file = vma->vm_file;
	struct mm_struct *mm = vma->vm_mm;
	struct mempolicy *pol;
	char buffer[64];
	struct mempolicy *pol;
	pgoff_t ilx;
	int nid;

	if (!mm)
@@ -2683,7 +2684,7 @@ static int show_numa_map(struct seq_file *m, void *v)
	/* Ensure we start with an empty set of numa_maps statistics. */
	memset(md, 0, sizeof(*md));

	pol = __get_vma_policy(vma, vma->vm_start);
	pol = __get_vma_policy(vma, vma->vm_start, &ilx);
	if (pol) {
		mpol_to_str(buffer, sizeof(buffer), pol);
		mpol_cond_put(pol);
+9 −1
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/topology.h>

struct vm_area_struct;
struct mempolicy;

/* Convert GFP flags to their corresponding migrate type */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -262,7 +263,9 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,

#ifdef CONFIG_NUMA
struct page *alloc_pages(gfp_t gfp, unsigned int order);
struct folio *folio_alloc(gfp_t gfp, unsigned order);
struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
		struct mempolicy *mpol, pgoff_t ilx, int nid);
struct folio *folio_alloc(gfp_t gfp, unsigned int order);
struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
		unsigned long addr, bool hugepage);
#else
@@ -270,6 +273,11 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
	return alloc_pages_node(numa_node_id(), gfp_mask, order);
}
static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
		struct mempolicy *mpol, pgoff_t ilx, int nid)
{
	return alloc_pages(gfp, order);
}
static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
{
	return __folio_alloc_node(gfp, order, numa_node_id());
+17 −3
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@

struct mm_struct;

#define NO_INTERLEAVE_INDEX (-1UL)	/* use task il_prev for interleaving */

#ifdef CONFIG_NUMA

/*
@@ -126,7 +128,9 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,

struct mempolicy *get_task_policy(struct task_struct *p);
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
		unsigned long addr);
		unsigned long addr, pgoff_t *ilx);
struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
		unsigned long addr, int order, pgoff_t *ilx);
bool vma_policy_mof(struct vm_area_struct *vma);

extern void numa_default_policy(void);
@@ -140,8 +144,6 @@ extern int huge_node(struct vm_area_struct *vma,
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
extern bool mempolicy_in_oom_domain(struct task_struct *tsk,
				const nodemask_t *mask);
extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy);

extern unsigned int mempolicy_slab_node(void);

extern enum zone_type policy_zone;
@@ -179,6 +181,11 @@ extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);

struct mempolicy {};

static inline struct mempolicy *get_task_policy(struct task_struct *p)
{
	return NULL;
}

static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
	return true;
@@ -213,6 +220,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx)
	return NULL;
}

static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
				unsigned long addr, int order, pgoff_t *ilx)
{
	*ilx = 0;
	return NULL;
}

static inline int
vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
{
+1 −1
Original line number Diff line number Diff line
@@ -619,7 +619,7 @@ struct vm_operations_struct {
	 * policy.
	 */
	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
					unsigned long addr);
					unsigned long addr, pgoff_t *ilx);
#endif
	/*
	 * Called by vm_normal_page() for special PTEs to find the
+8 −13
Original line number Diff line number Diff line
@@ -562,30 +562,25 @@ static unsigned long shm_pagesize(struct vm_area_struct *vma)
}

#ifdef CONFIG_NUMA
static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
{
	struct file *file = vma->vm_file;
	struct shm_file_data *sfd = shm_file_data(file);
	struct shm_file_data *sfd = shm_file_data(vma->vm_file);
	int err = 0;

	if (sfd->vm_ops->set_policy)
		err = sfd->vm_ops->set_policy(vma, new);
		err = sfd->vm_ops->set_policy(vma, mpol);
	return err;
}

static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
					unsigned long addr)
					unsigned long addr, pgoff_t *ilx)
{
	struct file *file = vma->vm_file;
	struct shm_file_data *sfd = shm_file_data(file);
	struct mempolicy *pol = NULL;
	struct shm_file_data *sfd = shm_file_data(vma->vm_file);
	struct mempolicy *mpol = vma->vm_policy;

	if (sfd->vm_ops->get_policy)
		pol = sfd->vm_ops->get_policy(vma, addr);
	else if (vma->vm_policy)
		pol = vma->vm_policy;

	return pol;
		mpol = sfd->vm_ops->get_policy(vma, addr, ilx);
	return mpol;
}
#endif

Loading