Commit debdce20 authored by Dave Jiang's avatar Dave Jiang Committed by Dan Williams
Browse files

cxl/region: Deal with numa nodes not enumerated by SRAT



For the numa nodes that are not created by SRAT, no memory_target is
allocated and is not managed by the HMAT_REPORTING code. Therefore
hmat_callback() memory hotplug notifier will exit early on those NUMA
nodes. The CXL memory hotplug notifier will need to call
node_set_perf_attrs() directly in order to setup the access sysfs
attributes.

In acpi_numa_init(), the last proximity domain (pxm) id created by SRAT is
stored. Add a helper function acpi_node_backed_by_real_pxm() in order to
check if a NUMA node id is defined by SRAT or created by CFMWS.

node_set_perf_attrs() symbol is exported to allow update of perf attribs
for a node. The sysfs path of
/sys/devices/system/node/nodeX/access0/initiators/* is created by
node_set_perf_attrs() for the various attributes where nodeX is matched
to the NUMA node of the CXL region.

Cc: Rafael J. Wysocki <rafael@kernel.org>
Reviewed-by: default avatarAlison Schofield <alison.schofield@intel.com>
Reviewed-by: default avatarJonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: default avatarJonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: default avatarDave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/20240308220055.2172956-13-dave.jiang@intel.com


Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 067353a4
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ static int node_to_pxm_map[MAX_NUMNODES]
unsigned char acpi_srat_revision __initdata;
static int acpi_numa __initdata;

static int last_real_pxm;

void __init disable_srat(void)
{
	acpi_numa = -1;
@@ -536,6 +538,7 @@ int __init acpi_numa_init(void)
		if (node_to_pxm_map[i] > fake_pxm)
			fake_pxm = node_to_pxm_map[i];
	}
	last_real_pxm = fake_pxm;
	fake_pxm++;
	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
			      &fake_pxm);
@@ -547,6 +550,14 @@ int __init acpi_numa_init(void)
	return 0;
}

bool acpi_node_backed_by_real_pxm(int nid)
{
	int pxm = node_to_pxm(nid);

	return pxm <= last_real_pxm;
}
EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm);

static int acpi_get_pxm(acpi_handle h)
{
	unsigned long long pxm;
+1 −0
Original line number Diff line number Diff line
@@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
		}
	}
}
EXPORT_SYMBOL_GPL(node_set_perf_attrs);

/**
 * struct node_cache_info - Internal tracking for memory node caches
+5 −0
Original line number Diff line number Diff line
@@ -586,3 +586,8 @@ int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
{
	return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
}

bool cxl_need_node_perf_attrs_update(int nid)
{
	return !acpi_node_backed_by_real_pxm(nid);
}
+1 −0
Original line number Diff line number Diff line
@@ -92,5 +92,6 @@ long cxl_pci_get_latency(struct pci_dev *pdev);

int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
				       enum access_coordinate_class access);
bool cxl_need_node_perf_attrs_update(int nid);

#endif /* __CXL_CORE_H__ */
+6 −1
Original line number Diff line number Diff line
@@ -2279,7 +2279,12 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)

	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
		if (cxlr->coord[i].read_bandwidth) {
			rc = 0;
			if (cxl_need_node_perf_attrs_update(nid))
				node_set_perf_attrs(nid, &cxlr->coord[i], i);
			else
				rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);

			if (rc == 0)
				cset++;
		}
Loading