Commit 1a8c64e1 authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Andrew Morton
Browse files

mm/memory_hotplug: embed vmem_altmap details in memory block

With memmap on memory, some architecture needs more details w.r.t altmap
such as base_pfn, end_pfn, etc to unmap vmemmap memory.  Instead of
computing them again when we remove a memory block, embed vmem_altmap
details in struct memory_block if we are using memmap on memory block
feature.

[yangyingliang@huawei.com: fix error return code in add_memory_resource()]
  Link: https://lkml.kernel.org/r/20230809081552.1351184-1-yangyingliang@huawei.com
Link: https://lkml.kernel.org/r/20230808091501.287660-7-aneesh.kumar@linux.ibm.com


Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 603fd64d
Loading
Loading
Loading
Loading
+17 −10
Original line number Diff line number Diff line
@@ -105,7 +105,8 @@ EXPORT_SYMBOL(unregister_memory_notifier);
static void memory_block_release(struct device *dev)
{
	struct memory_block *mem = to_memory_block(dev);

	/* Verify that the altmap is freed */
	WARN_ON(mem->altmap);
	kfree(mem);
}

@@ -183,7 +184,7 @@ static int memory_block_online(struct memory_block *mem)
{
	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
	unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
	unsigned long nr_vmemmap_pages = 0;
	struct zone *zone;
	int ret;

@@ -200,6 +201,9 @@ static int memory_block_online(struct memory_block *mem)
	 * stage helps to keep accounting easier to follow - e.g vmemmaps
	 * belong to the same zone as the memory they backed.
	 */
	if (mem->altmap)
		nr_vmemmap_pages = mem->altmap->free;

	if (nr_vmemmap_pages) {
		ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
		if (ret)
@@ -230,7 +234,7 @@ static int memory_block_offline(struct memory_block *mem)
{
	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
	unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
	unsigned long nr_vmemmap_pages = 0;
	int ret;

	if (!mem->zone)
@@ -240,6 +244,9 @@ static int memory_block_offline(struct memory_block *mem)
	 * Unaccount before offlining, such that unpopulated zone and kthreads
	 * can properly be torn down in offline_pages().
	 */
	if (mem->altmap)
		nr_vmemmap_pages = mem->altmap->free;

	if (nr_vmemmap_pages)
		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
					  -nr_vmemmap_pages);
@@ -726,7 +733,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
#endif

static int add_memory_block(unsigned long block_id, unsigned long state,
			    unsigned long nr_vmemmap_pages,
			    struct vmem_altmap *altmap,
			    struct memory_group *group)
{
	struct memory_block *mem;
@@ -744,7 +751,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
	mem->start_section_nr = block_id * sections_per_block;
	mem->state = state;
	mem->nid = NUMA_NO_NODE;
	mem->nr_vmemmap_pages = nr_vmemmap_pages;
	mem->altmap = altmap;
	INIT_LIST_HEAD(&mem->group_next);

#ifndef CONFIG_NUMA
@@ -783,14 +790,14 @@ static int __init add_boot_memory_block(unsigned long base_section_nr)
	if (section_count == 0)
		return 0;
	return add_memory_block(memory_block_id(base_section_nr),
				MEM_ONLINE, 0,  NULL);
				MEM_ONLINE, NULL,  NULL);
}

static int add_hotplug_memory_block(unsigned long block_id,
				    unsigned long nr_vmemmap_pages,
				    struct vmem_altmap *altmap,
				    struct memory_group *group)
{
	return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group);
	return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
}

static void remove_memory_block(struct memory_block *memory)
@@ -818,7 +825,7 @@ static void remove_memory_block(struct memory_block *memory)
 * Called under device_hotplug_lock.
 */
int create_memory_block_devices(unsigned long start, unsigned long size,
				unsigned long vmemmap_pages,
				struct vmem_altmap *altmap,
				struct memory_group *group)
{
	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
@@ -832,7 +839,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
		return -EINVAL;

	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
		ret = add_hotplug_memory_block(block_id, vmemmap_pages, group);
		ret = add_hotplug_memory_block(block_id, altmap, group);
		if (ret)
			break;
	}
+2 −6
Original line number Diff line number Diff line
@@ -77,11 +77,7 @@ struct memory_block {
	 */
	struct zone *zone;
	struct device dev;
	/*
	 * Number of vmemmap pages. These pages
	 * lay at the beginning of the memory block.
	 */
	unsigned long nr_vmemmap_pages;
	struct vmem_altmap *altmap;
	struct memory_group *group;	/* group (if any) for this block */
	struct list_head group_next;	/* next block inside memory group */
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
@@ -147,7 +143,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size,
				unsigned long vmemmap_pages,
				struct vmem_altmap *altmap,
				struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
+35 −21
Original line number Diff line number Diff line
@@ -1439,7 +1439,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
	if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
		if (mhp_supports_memmap_on_memory(size)) {
			mhp_altmap.free = memory_block_memmap_on_memory_pages();
			params.altmap = &mhp_altmap;
			params.altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL);
			if (!params.altmap) {
				ret = -ENOMEM;
				goto error;
			}

			memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap));
		}
		/* fallback to not using altmap  */
	}
@@ -1447,13 +1453,13 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
	/* call arch's memory hotadd */
	ret = arch_add_memory(nid, start, size, &params);
	if (ret < 0)
		goto error;
		goto error_free;

	/* create memory block devices after memory was added */
	ret = create_memory_block_devices(start, size, mhp_altmap.free, group);
	ret = create_memory_block_devices(start, size, params.altmap, group);
	if (ret) {
		arch_remove_memory(start, size, NULL);
		goto error;
		goto error_free;
	}

	if (new_node) {
@@ -1490,6 +1496,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
		walk_memory_blocks(start, size, NULL, online_memory_block);

	return ret;
error_free:
	kfree(params.altmap);
error:
	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
		memblock_remove(start, size);
@@ -2056,12 +2064,18 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
	return 0;
}

static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
static int test_has_altmap_cb(struct memory_block *mem, void *arg)
{
	struct memory_block **mem_ptr = (struct memory_block **)arg;
	/*
	 * If not set, continue with the next block.
	 * return the memblock if we have altmap
	 * and break callback.
	 */
	return mem->nr_vmemmap_pages;
	if (mem->altmap) {
		*mem_ptr = mem;
		return 1;
	}
	return 0;
}

static int check_cpu_on_node(int nid)
@@ -2136,10 +2150,9 @@ EXPORT_SYMBOL(try_offline_node);

static int __ref try_remove_memory(u64 start, u64 size)
{
	struct vmem_altmap mhp_altmap = {};
	struct vmem_altmap *altmap = NULL;
	unsigned long nr_vmemmap_pages;
	struct memory_block *mem;
	int rc = 0, nid = NUMA_NO_NODE;
	struct vmem_altmap *altmap = NULL;

	BUG_ON(check_hotplug_memory_range(start, size));

@@ -2161,25 +2174,20 @@ static int __ref try_remove_memory(u64 start, u64 size)
	 * the same granularity it was added - a single memory block.
	 */
	if (mhp_memmap_on_memory()) {
		nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
						      get_nr_vmemmap_pages_cb);
		if (nr_vmemmap_pages) {
		rc = walk_memory_blocks(start, size, &mem, test_has_altmap_cb);
		if (rc) {
			if (size != memory_block_size_bytes()) {
				pr_warn("Refuse to remove %#llx - %#llx,"
					"wrong granularity\n",
					start, start + size);
				return -EINVAL;
			}

			altmap = mem->altmap;
			/*
			 * Let remove_pmd_table->free_hugepage_table do the
			 * right thing if we used vmem_altmap when hot-adding
			 * the range.
			 * Mark altmap NULL so that we can add a debug
			 * check on memblock free.
			 */
			mhp_altmap.base_pfn = PHYS_PFN(start);
			mhp_altmap.free = nr_vmemmap_pages;
			mhp_altmap.alloc = nr_vmemmap_pages;
			altmap = &mhp_altmap;
			mem->altmap = NULL;
		}
	}

@@ -2196,6 +2204,12 @@ static int __ref try_remove_memory(u64 start, u64 size)

	arch_remove_memory(start, size, altmap);

	/* Verify that all vmemmap pages have actually been freed. */
	if (altmap) {
		WARN(altmap->alloc, "Altmap not fully unmapped");
		kfree(altmap);
	}

	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
		memblock_phys_free(start, size);
		memblock_remove(start, size);