Merge tag 'cxl-for-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl (5797d10e) · Commits · git / linux-net

Documentation/ABI/testing/sysfs-bus-cxl

+10 −1

Original line number	Diff line number	Diff line
		@@ -496,8 +496,17 @@ Description:
		changed, only freed by writing 0. The kernel makes no guarantees
		that data is maintained over an address space freeing event, and
		there is no guarantee that a free followed by an allocate
		results in the same address being allocated.
		results in the same address being allocated. If extended linear
		cache is present, the size indicates extended linear cache size
		plus the CXL region size.

		What: /sys/bus/cxl/devices/regionZ/extended_linear_cache_size
		Date: October, 2025
		KernelVersion: v6.19
		Contact: linux-cxl@vger.kernel.org
		Description:
		(RO) The size of extended linear cache, if there is an extended
		linear cache. Otherwise the attribute will not be visible.

		What: /sys/bus/cxl/devices/regionZ/mode
		Date: January, 2023

Documentation/driver-api/cxl/allocation/page-allocator.rst

+0 −31

Original line number	Diff line number	Diff line
		@@ -41,37 +41,6 @@ To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over
		will fallback to allocate from :code:`ZONE_NORMAL`.


		Zone and Node Quirks
		====================
		Let's consider a configuration where the local DRAM capacity is largely onlined
		into :code:`ZONE_NORMAL`, with no :code:`ZONE_MOVABLE` capacity present. The
		CXL capacity has the opposite configuration - all onlined in
		:code:`ZONE_MOVABLE`.

		Under the default allocation policy, the page allocator will completely skip
		:code:`ZONE_MOVABLE` as a valid allocation target. This is because, as of
		Linux v6.15, the page allocator does (approximately) the following: ::

		for (each zone in local_node):

		for (each node in fallback_order):

		attempt_allocation(gfp_flags);

		Because the local node does not have :code:`ZONE_MOVABLE`, the CXL node is
		functionally unreachable for direct allocation. As a result, the only way
		for CXL capacity to be used is via `demotion` in the reclaim path.

		This configuration also means that if the DRAM ndoe has :code:`ZONE_MOVABLE`
		capacity - when that capacity is depleted, the page allocator will actually
		prefer CXL :code:`ZONE_MOVABLE` pages over DRAM :code:`ZONE_NORMAL` pages.

		We may wish to invert this priority in future Linux versions.

		If `demotion` and `swap` are disabled, Linux will begin to cause OOM crashes
		when the DRAM nodes are depleted. See the reclaim section for more details.


		CGroups and CPUSets
		===================
		Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined

drivers/acpi/numa/hmat.c

+6 −5

Original line number	Diff line number	Diff line
		@@ -910,12 +910,13 @@ static void hmat_register_target(struct memory_target *target)
		* Register generic port perf numbers. The nid may not be
		* initialized and is still NUMA_NO_NODE.
		*/
		mutex_lock(&target_lock);
		scoped_guard(mutex, &target_lock) {
		if ((u16 )target->gen_port_device_handle) {
		hmat_update_generic_target(target);
		target->registered = true;
		return;
		}
		}
		mutex_unlock(&target_lock);

		hmat_hotplug_target(target);
		}

drivers/cxl/acpi.c

+41 −32

Original line number	Diff line number	Diff line
		@@ -11,25 +11,36 @@
		#include "cxlpci.h"
		#include "cxl.h"

		struct cxl_cxims_data {
		int nr_maps;
		u64 xormaps[] __counted_by(nr_maps);
		};

		static const guid_t acpi_cxl_qtg_id_guid =
		GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071,
		0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52);

		static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr)
		#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1)
		static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = {
		[1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4
		};

		static const int valid_hbiw[] = { 1, 2, 3, 4, 6, 8, 12, 16 };

		u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw)
		{
		struct cxl_cxims_data *cximsd = cxlrd->platform_data;
		int hbiw = cxlrd->cxlsd.nr_targets;
		int nr_maps_to_apply = -1;
		u64 val;
		int pos;

		/* No xormaps for host bridge interleave ways of 1 or 3 */
		if (hbiw == 1 \|\| hbiw == 3)
		return addr;
		/*
		* Strictly validate hbiw since this function is used for testing and
		* that nullifies any expectation of trusted parameters from the CXL
		* Region Driver.
		*/
		for (int i = 0; i < ARRAY_SIZE(valid_hbiw); i++) {
		if (valid_hbiw[i] == hbiw) {
		nr_maps_to_apply = hbiw_to_nr_maps[hbiw];
		break;
		}
		}
		if (nr_maps_to_apply == -1 \|\| nr_maps_to_apply > cximsd->nr_maps)
		return ULLONG_MAX;

		/*
		* In regions using XOR interleave arithmetic the CXL HPA may not
		@@ -60,6 +71,14 @@ static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr)

		return addr;
		}
		EXPORT_SYMBOL_FOR_MODULES(cxl_do_xormap_calc, "cxl_translate");

		static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr)
		{
		struct cxl_cxims_data *cximsd = cxlrd->platform_data;

		return cxl_do_xormap_calc(cximsd, addr, cxlrd->cxlsd.nr_targets);
		}

		struct cxl_cxims_context {
		struct device *dev;
		@@ -353,7 +372,7 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd)

		rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size);
		if (rc)
		return rc;
		return 0;

		/*
		* The cache range is expected to be within the CFMWS.
		@@ -378,22 +397,19 @@ static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd)
		int rc;

		rc = cxl_acpi_set_cache_size(cxlrd);
		if (!rc)
		return;

		if (rc != -EOPNOTSUPP) {
		if (rc) {
		/*
		* Failing to support extended linear cache region resize does not
		* Failing to retrieve extended linear cache region resize does not
		* prevent the region from functioning. Only causes cxl list showing
		* incorrect region size.
		*/
		dev_warn(cxlrd->cxlsd.cxld.dev.parent,
		"Extended linear cache calculation failed rc:%d\n", rc);
		}
		"Extended linear cache retrieval failed rc:%d\n", rc);

		/* Ignoring return code */
		cxlrd->cache_size = 0;
		}
		}

		DEFINE_FREE(put_cxlrd, struct cxl_root_decoder *,
		if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev))
		@@ -453,8 +469,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
		ig = CXL_DECODER_MIN_GRANULARITY;
		cxld->interleave_granularity = ig;

		cxl_setup_extended_linear_cache(cxlrd);

		if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
		if (ways != 1 && ways != 3) {
		cxims_ctx = (struct cxl_cxims_context) {
		@@ -470,18 +484,13 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
		return -EINVAL;
		}
		}
		cxlrd->ops.hpa_to_spa = cxl_apply_xor_maps;
		cxlrd->ops.spa_to_hpa = cxl_apply_xor_maps;
		}

		cxlrd->qos_class = cfmws->qtg_id;

		if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
		cxlrd->ops = kzalloc(sizeof(*cxlrd->ops), GFP_KERNEL);
		if (!cxlrd->ops)
		return -ENOMEM;
		cxl_setup_extended_linear_cache(cxlrd);

		cxlrd->ops->hpa_to_spa = cxl_apply_xor_maps;
		cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps;
		}
		cxlrd->qos_class = cfmws->qtg_id;

		rc = cxl_decoder_add(cxld);
		if (rc)

drivers/cxl/core/cdat.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -826,7 +826,7 @@ static struct xarray cxl_switch_gather_bandwidth(struct cxl_region cxlr,
		cxl_coordinates_combine(coords, coords, ctx->coord);

		/*
		* Take the min of the calculated bandwdith and the upstream
		* Take the min of the calculated bandwidth and the upstream
		* switch SSLBIS bandwidth if there's a parent switch
		*/
		if (!is_root)
		@@ -949,7 +949,7 @@ static struct xarray cxl_hb_gather_bandwidth(struct xarray xa)
		/**
		* cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
		* @cxlr: The region being operated on
		* @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
		* @input_xa: xarray holds cxl_perf_ctx with calculated bandwidth per ACPI0017 instance
		*/
		static void cxl_region_update_bandwidth(struct cxl_region *cxlr,
		struct xarray *input_xa)