Commit 030f8803 authored by Dan Williams's avatar Dan Williams
Browse files

cxl/region: Fix region setup/teardown for RCDs



RCDs (CXL memory devices that link train without VH capability and show
up as root complex integrated endpoints), hide the presence of the link
between the endpoint and the host-bridge. The CXL region setup/teardown
paths assume that a link hop is present and go looking for at least one
'struct cxl_port' instance between the CXL root port-object and an
endpoint port-object leading to crashes of the form:

    BUG: kernel NULL pointer dereference, address: 0000000000000008
    [..]
    RIP: 0010:cxl_region_setup_targets+0x3e9/0xae0 [cxl_core]
    [..]
    Call Trace:
     <TASK>
     cxl_region_attach+0x46c/0x7a0 [cxl_core]
     cxl_create_region+0x20b/0x270 [cxl_core]
     cxl_mock_mem_probe+0x641/0x800 [cxl_mock_mem]
     platform_probe+0x5b/0xb0

Detect RCDs explicitly and skip walking the non-existent port hierarchy
between root and endpoint in that case.

While this has been a problem since:

commit 0a19bfc8 ("cxl/port: Add RCD endpoint port enumeration")

...it becomes a more reliable crash scenario with the new autodiscovery
implementation.

Fixes: a32320b7 ("cxl/region: Add region autodiscovery")
Reviewed-by: default avatarIra Weiny <ira.weiny@intel.com>
Reviewed-by: default avatarDave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/168002858268.50647.728091521032131326.stgit@dwillia2-xfh.jf.intel.com


Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent d35b495d
Loading
Loading
Loading
Loading
+27 −1
Original line number Diff line number Diff line
@@ -134,9 +134,13 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
		struct cxl_endpoint_decoder *cxled = p->targets[i];
		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
		struct cxl_port *iter = cxled_to_port(cxled);
		struct cxl_dev_state *cxlds = cxlmd->cxlds;
		struct cxl_ep *ep;
		int rc = 0;

		if (cxlds->rcd)
			goto endpoint_reset;

		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
			iter = to_cxl_port(iter->dev.parent);

@@ -153,6 +157,7 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
				return rc;
		}

endpoint_reset:
		rc = cxled->cxld.reset(&cxled->cxld);
		if (rc)
			return rc;
@@ -1199,6 +1204,7 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
{
	struct cxl_region_params *p = &cxlr->params;
	struct cxl_endpoint_decoder *cxled;
	struct cxl_dev_state *cxlds;
	struct cxl_memdev *cxlmd;
	struct cxl_port *iter;
	struct cxl_ep *ep;
@@ -1214,6 +1220,10 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
	for (i = 0; i < p->nr_targets; i++) {
		cxled = p->targets[i];
		cxlmd = cxled_to_memdev(cxled);
		cxlds = cxlmd->cxlds;

		if (cxlds->rcd)
			continue;

		iter = cxled_to_port(cxled);
		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
@@ -1229,14 +1239,24 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
{
	struct cxl_region_params *p = &cxlr->params;
	struct cxl_endpoint_decoder *cxled;
	struct cxl_dev_state *cxlds;
	int i, rc, rch = 0, vh = 0;
	struct cxl_memdev *cxlmd;
	struct cxl_port *iter;
	struct cxl_ep *ep;
	int i, rc;

	for (i = 0; i < p->nr_targets; i++) {
		cxled = p->targets[i];
		cxlmd = cxled_to_memdev(cxled);
		cxlds = cxlmd->cxlds;

		/* validate that all targets agree on topology */
		if (!cxlds->rcd) {
			vh++;
		} else {
			rch++;
			continue;
		}

		iter = cxled_to_port(cxled);
		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
@@ -1256,6 +1276,12 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
		}
	}

	if (rch && vh) {
		dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
		cxl_region_teardown_targets(cxlr);
		return -ENXIO;
	}

	return 0;
}