Commit 7fb1d9fc authored by Rohit Seth's avatar Rohit Seth Committed by Linus Torvalds
Browse files

[PATCH] mm: __alloc_pages cleanup



Clean up of __alloc_pages.

Restoration of previous behaviour, plus further cleanups by introducing an
'alloc_flags', removing the last of should_reclaim_zone.

Signed-off-by: default avatarRohit Seth <rohit.seth@intel.com>
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 51c6f666
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -329,7 +329,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive,
void build_all_zonelists(void);
void wakeup_kswapd(struct zone *zone, int order);
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
		int alloc_type, int can_try_harder, gfp_t gfp_high);
		int classzone_idx, int alloc_flags);

#ifdef CONFIG_HAVE_MEMORY_PRESENT
void memory_present(int nid, unsigned long start, unsigned long end);
+85 −110
Original line number Diff line number Diff line
@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
		}
		local_irq_restore(flags);
		put_cpu();
	}

	if (page == NULL) {
	} else {
		spin_lock_irqsave(&zone->lock, flags);
		page = __rmqueue(zone, order);
		spin_unlock_irqrestore(&zone->lock, flags);
@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
	return page;
}

#define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */
#define ALLOC_HARDER		0x02 /* try to alloc harder */
#define ALLOC_HIGH		0x04 /* __GFP_HIGH set */
#define ALLOC_CPUSET		0x08 /* check for correct cpuset */

/*
 * Return 1 if free pages are above 'mark'. This takes into account the order
 * of the allocation.
 */
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
		      int classzone_idx, int can_try_harder, gfp_t gfp_high)
		      int classzone_idx, int alloc_flags)
{
	/* free_pages my go negative - that's OK */
	long min = mark, free_pages = z->free_pages - (1 << order) + 1;
	int o;

	if (gfp_high)
	if (alloc_flags & ALLOC_HIGH)
		min -= min / 2;
	if (can_try_harder)
	if (alloc_flags & ALLOC_HARDER)
		min -= min / 4;

	if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
	return 1;
}

static inline int
should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
/*
 * get_page_from_freeliest goes through the zonelist trying to allocate
 * a page.
 */
static struct page *
get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
		struct zonelist *zonelist, int alloc_flags)
{
	if (!z->reclaim_pages)
		return 0;
	if (gfp_mask & __GFP_NORECLAIM)
		return 0;
	return 1;
	struct zone **z = zonelist->zones;
	struct page *page = NULL;
	int classzone_idx = zone_idx(*z);

	/*
	 * Go through the zonelist once, looking for a zone with enough free.
	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
	 */
	do {
		if ((alloc_flags & ALLOC_CPUSET) &&
				!cpuset_zone_allowed(*z, gfp_mask))
			continue;

		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
			if (!zone_watermark_ok(*z, order, (*z)->pages_low,
				    classzone_idx, alloc_flags))
				continue;
		}

		page = buffered_rmqueue(*z, order, gfp_mask);
		if (page) {
			zone_statistics(zonelist, *z);
			break;
		}
	} while (*(++z) != NULL);
	return page;
}

/*
@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
		struct zonelist *zonelist)
{
	const gfp_t wait = gfp_mask & __GFP_WAIT;
	struct zone **zones, *z;
	struct zone **z;
	struct page *page;
	struct reclaim_state reclaim_state;
	struct task_struct *p = current;
	int i;
	int classzone_idx;
	int do_retry;
	int can_try_harder;
	int alloc_flags;
	int did_some_progress;

	might_sleep_if(wait);

	/*
	 * The caller may dip into page reserves a bit more if the caller
	 * cannot run direct reclaim, or is the caller has realtime scheduling
	 * policy
	 */
	can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;

	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
	z = zonelist->zones;  /* the list of zones suitable for gfp_mask */

	if (unlikely(zones[0] == NULL)) {
	if (unlikely(*z == NULL)) {
		/* Should this ever happen?? */
		return NULL;
	}

	classzone_idx = zone_idx(zones[0]);

restart:
	/*
	 * Go through the zonelist once, looking for a zone with enough free.
	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
	 */
	for (i = 0; (z = zones[i]) != NULL; i++) {
		int do_reclaim = should_reclaim_zone(z, gfp_mask);
	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
				zonelist, ALLOC_CPUSET);
	if (page)
		goto got_pg;

		if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
			continue;
	do
		wakeup_kswapd(*z, order);
	while (*(++z));

	/*
		 * If the zone is to attempt early page reclaim then this loop
		 * will try to reclaim pages and check the watermark a second
		 * time before giving up and falling back to the next zone.
	 * OK, we're below the kswapd watermark and have kicked background
	 * reclaim. Now things get more complex, so set up alloc_flags according
	 * to how we want to proceed.
	 *
	 * The caller may dip into page reserves a bit more if the caller
	 * cannot run direct reclaim, or if the caller has realtime scheduling
	 * policy.
	 */
zone_reclaim_retry:
		if (!zone_watermark_ok(z, order, z->pages_low,
				       classzone_idx, 0, 0)) {
			if (!do_reclaim)
				continue;
			else {
				zone_reclaim(z, gfp_mask, order);
				/* Only try reclaim once */
				do_reclaim = 0;
				goto zone_reclaim_retry;
			}
		}

		page = buffered_rmqueue(z, order, gfp_mask);
		if (page)
			goto got_pg;
	}

	for (i = 0; (z = zones[i]) != NULL; i++)
		wakeup_kswapd(z, order);
	alloc_flags = 0;
	if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
		alloc_flags |= ALLOC_HARDER;
	if (gfp_mask & __GFP_HIGH)
		alloc_flags |= ALLOC_HIGH;
	if (wait)
		alloc_flags |= ALLOC_CPUSET;

	/*
	 * Go through the zonelist again. Let __GFP_HIGH and allocations
	 * coming from realtime tasks to go deeper into reserves
	 * coming from realtime tasks go deeper into reserves.
	 *
	 * This is the last chance, in general, before the goto nopage.
	 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
	 */
	for (i = 0; (z = zones[i]) != NULL; i++) {
		if (!zone_watermark_ok(z, order, z->pages_min,
				       classzone_idx, can_try_harder,
				       gfp_mask & __GFP_HIGH))
			continue;

		if (wait && !cpuset_zone_allowed(z, gfp_mask))
			continue;

		page = buffered_rmqueue(z, order, gfp_mask);
	page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
	if (page)
		goto got_pg;
	}

	/* This allocation should allow future memory freeing. */

@@ -897,13 +894,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
nofail_alloc:
			/* go through the zonelist yet again, ignoring mins */
			for (i = 0; (z = zones[i]) != NULL; i++) {
				if (!cpuset_zone_allowed(z, gfp_mask))
					continue;
				page = buffered_rmqueue(z, order, gfp_mask);
			page = get_page_from_freelist(gfp_mask, order,
				zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
			if (page)
				goto got_pg;
			}
			if (gfp_mask & __GFP_NOFAIL) {
				blk_congestion_wait(WRITE, HZ/50);
				goto nofail_alloc;
@@ -924,7 +918,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
	reclaim_state.reclaimed_slab = 0;
	p->reclaim_state = &reclaim_state;

	did_some_progress = try_to_free_pages(zones, gfp_mask);
	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);

	p->reclaim_state = NULL;
	p->flags &= ~PF_MEMALLOC;
@@ -932,19 +926,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
	cond_resched();

	if (likely(did_some_progress)) {
		for (i = 0; (z = zones[i]) != NULL; i++) {
			if (!zone_watermark_ok(z, order, z->pages_min,
					       classzone_idx, can_try_harder,
					       gfp_mask & __GFP_HIGH))
				continue;

			if (!cpuset_zone_allowed(z, gfp_mask))
				continue;

			page = buffered_rmqueue(z, order, gfp_mask);
		page = get_page_from_freelist(gfp_mask, order,
						zonelist, alloc_flags);
		if (page)
			goto got_pg;
		}
	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
		/*
		 * Go through the zonelist yet one more time, keep
@@ -952,18 +937,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
		 * a parallel oom killing, we must fail if we're still
		 * under heavy pressure.
		 */
		for (i = 0; (z = zones[i]) != NULL; i++) {
			if (!zone_watermark_ok(z, order, z->pages_high,
					       classzone_idx, 0, 0))
				continue;

			if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
				continue;

			page = buffered_rmqueue(z, order, gfp_mask);
		page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
						zonelist, ALLOC_CPUSET);
		if (page)
			goto got_pg;
		}

		out_of_memory(gfp_mask, order);
		goto restart;
@@ -996,9 +973,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
		dump_stack();
		show_mem();
	}
	return NULL;
got_pg:
	zone_statistics(zonelist, z);
	return page;
}

+3 −3
Original line number Diff line number Diff line
@@ -1074,7 +1074,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)
					continue;

				if (!zone_watermark_ok(zone, order,
						zone->pages_high, 0, 0, 0)) {
						zone->pages_high, 0, 0)) {
					end_zone = i;
					goto scan;
				}
@@ -1111,7 +1111,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)

			if (nr_pages == 0) {	/* Not software suspend */
				if (!zone_watermark_ok(zone, order,
						zone->pages_high, end_zone, 0, 0))
						zone->pages_high, end_zone, 0))
					all_zones_ok = 0;
			}
			zone->temp_priority = priority;
@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
		return;

	pgdat = zone->zone_pgdat;
	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
		return;
	if (pgdat->kswapd_max_order < order)
		pgdat->kswapd_max_order = order;