Commit 2be57543 authored by Dave Jiang's avatar Dave Jiang
Browse files

Merge branch 'for-6.19/cxl-misc' into cxl-for-next

Misc patches for CXL 6.19
- Remove incorrect page-allocator quirk section in documentation.
- Remove unused devm_cxl_port_enumerate_dports() function.
- Fix typo in cdat.c code comment.
- Replace use of system_wq with system_percpu_wq
- Add locked decoder support
- Return when generic target updated
- Rename region_res_match_cxl_range() to spa_maps_hpa()
- Clarify comment in spa_maps_hpa()
parents e9a6fb0b 8d27dd0b
Loading
Loading
Loading
Loading
+0 −31
Original line number Diff line number Diff line
@@ -41,37 +41,6 @@ To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over
will fallback to allocate from :code:`ZONE_NORMAL`.


Zone and Node Quirks
====================
Let's consider a configuration where the local DRAM capacity is largely onlined
into :code:`ZONE_NORMAL`, with no :code:`ZONE_MOVABLE` capacity present. The
CXL capacity has the opposite configuration - all onlined in
:code:`ZONE_MOVABLE`.

Under the default allocation policy, the page allocator will completely skip
:code:`ZONE_MOVABLE` as a valid allocation target.  This is because, as of
Linux v6.15, the page allocator does (approximately) the following: ::

  for (each zone in local_node):

    for (each node in fallback_order):

      attempt_allocation(gfp_flags);

Because the local node does not have :code:`ZONE_MOVABLE`, the CXL node is
functionally unreachable for direct allocation.  As a result, the only way
for CXL capacity to be used is via `demotion` in the reclaim path.

This configuration also means that if the DRAM ndoe has :code:`ZONE_MOVABLE`
capacity - when that capacity is depleted, the page allocator will actually
prefer CXL :code:`ZONE_MOVABLE` pages over DRAM :code:`ZONE_NORMAL` pages.

We may wish to invert this priority in future Linux versions.

If `demotion` and `swap` are disabled, Linux will begin to cause OOM crashes
when the DRAM nodes are depleted. See the reclaim section for more details.


CGroups and CPUSets
===================
Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined
+6 −5
Original line number Diff line number Diff line
@@ -888,12 +888,13 @@ static void hmat_register_target(struct memory_target *target)
	 * Register generic port perf numbers. The nid may not be
	 * initialized and is still NUMA_NO_NODE.
	 */
	mutex_lock(&target_lock);
	scoped_guard(mutex, &target_lock) {
		if (*(u16 *)target->gen_port_device_handle) {
			hmat_update_generic_target(target);
			target->registered = true;
			return;
		}
	}
	mutex_unlock(&target_lock);

	/*
	 * Skip offline nodes. This can happen when memory
+2 −2
Original line number Diff line number Diff line
@@ -826,7 +826,7 @@ static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr,
		cxl_coordinates_combine(coords, coords, ctx->coord);

		/*
		 * Take the min of the calculated bandwdith and the upstream
		 * Take the min of the calculated bandwidth and the upstream
		 * switch SSLBIS bandwidth if there's a parent switch
		 */
		if (!is_root)
@@ -949,7 +949,7 @@ static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa)
/**
 * cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
 * @cxlr: The region being operated on
 * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
 * @input_xa: xarray holds cxl_perf_ctx with calculated bandwidth per ACPI0017 instance
 */
static void cxl_region_update_bandwidth(struct cxl_region *cxlr,
					struct xarray *input_xa)
+3 −0
Original line number Diff line number Diff line
@@ -905,6 +905,9 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld)
	if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
		return;

	if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
		return;

	if (port->commit_end == id)
		cxl_port_commit_reap(cxld);
	else
+8 −79
Original line number Diff line number Diff line
@@ -71,85 +71,6 @@ struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL");

struct cxl_walk_context {
	struct pci_bus *bus;
	struct cxl_port *port;
	int type;
	int error;
	int count;
};

static int match_add_dports(struct pci_dev *pdev, void *data)
{
	struct cxl_walk_context *ctx = data;
	struct cxl_port *port = ctx->port;
	int type = pci_pcie_type(pdev);
	struct cxl_register_map map;
	struct cxl_dport *dport;
	u32 lnkcap, port_num;
	int rc;

	if (pdev->bus != ctx->bus)
		return 0;
	if (!pci_is_pcie(pdev))
		return 0;
	if (type != ctx->type)
		return 0;
	if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
				  &lnkcap))
		return 0;

	rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
	if (rc)
		dev_dbg(&port->dev, "failed to find component registers\n");

	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
	dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
	if (IS_ERR(dport)) {
		ctx->error = PTR_ERR(dport);
		return PTR_ERR(dport);
	}
	ctx->count++;

	return 0;
}

/**
 * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
 * @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated
 *
 * Returns a positive number of dports enumerated or a negative error
 * code.
 */
int devm_cxl_port_enumerate_dports(struct cxl_port *port)
{
	struct pci_bus *bus = cxl_port_to_pci_bus(port);
	struct cxl_walk_context ctx;
	int type;

	if (!bus)
		return -ENXIO;

	if (pci_is_root_bus(bus))
		type = PCI_EXP_TYPE_ROOT_PORT;
	else
		type = PCI_EXP_TYPE_DOWNSTREAM;

	ctx = (struct cxl_walk_context) {
		.port = port,
		.bus = bus,
		.type = type,
	};
	pci_walk_bus(bus, match_add_dports, &ctx);

	if (ctx.count == 0)
		return -ENODEV;
	if (ctx.error)
		return ctx.error;
	return ctx.count;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL");

static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
{
	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
@@ -1217,6 +1138,14 @@ int cxl_gpf_port_setup(struct cxl_dport *dport)
	return 0;
}

struct cxl_walk_context {
	struct pci_bus *bus;
	struct cxl_port *port;
	int type;
	int error;
	int count;
};

static int count_dports(struct pci_dev *pdev, void *data)
{
	struct cxl_walk_context *ctx = data;
Loading