Commit 586b5dfb authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cxl fixes from Dave Jiang:

 - Fix index of Clear Event Record handles in cxl_clear_event_record()

 - Fix use before init of map->reg_type in cxl_decode_regblock()

 - Fix initialization of mbox_cmd.size_out in cxl_mem_get_records_log()

 - Fix CXL path access_coordinate computation:
     - Remove unneded check of iter in loop
     - Fix of retrieving of access_coordinate in PCI topology walk
     - Fix of incorrect region access_coordinate data calculation
     - Consolidate of access_coordinates attached to downstream port
       context
     - Add check to validate access_coordinate validity to prevent
       incorrect data being exposed via sysfs

* tag 'cxl-fixes-6.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  cxl: Add checks to access_coordinate calculation to fail missing data
  cxl: Consolidate dport access_coordinate ->hb_coord and ->sw_coord into ->coord
  cxl: Fix incorrect region perf data calculation
  cxl: Fix retrieving of access_coordinates in PCIe path
  cxl: Remove checking of iter in cxl_endpoint_get_perf_coordinates()
  cxl/core: Fix initialization of mbox_cmd.size_out in get event
  cxl/core/regs: Fix usage of map->reg_type in cxl_decode_regblock() before assigned
  cxl/mem: Fix for the index of Clear Event Record Handle
parents 52e5070f 7bcf809b
Loading
Loading
Loading
Loading
+1 −12
Original line number Diff line number Diff line
@@ -525,22 +525,11 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
{
	struct acpi_device *hb = to_cxl_host_bridge(NULL, dev);
	u32 uid;
	int rc;

	if (kstrtou32(acpi_device_uid(hb), 0, &uid))
		return -EINVAL;

	rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
	if (rc < 0)
		return rc;

	/* Adjust back to picoseconds from nanoseconds */
	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
		dport->hb_coord[i].read_latency *= 1000;
		dport->hb_coord[i].write_latency *= 1000;
	}

	return 0;
	return acpi_get_genport_coordinates(uid, dport->coord);
}

static int add_host_bridge_dport(struct device *match, void *arg)
+79 −73
Original line number Diff line number Diff line
@@ -14,12 +14,42 @@
struct dsmas_entry {
	struct range dpa_range;
	u8 handle;
	struct access_coordinate coord;
	struct access_coordinate coord[ACCESS_COORDINATE_MAX];

	int entries;
	int qos_class;
};

static u32 cdat_normalize(u16 entry, u64 base, u8 type)
{
	u32 value;

	/*
	 * Check for invalid and overflow values
	 */
	if (entry == 0xffff || !entry)
		return 0;
	else if (base > (UINT_MAX / (entry)))
		return 0;

	/*
	 * CDAT fields follow the format of HMAT fields. See table 5 Device
	 * Scoped Latency and Bandwidth Information Structure in Coherent Device
	 * Attribute Table (CDAT) Specification v1.01.
	 */
	value = entry * base;
	switch (type) {
	case ACPI_HMAT_ACCESS_LATENCY:
	case ACPI_HMAT_READ_LATENCY:
	case ACPI_HMAT_WRITE_LATENCY:
		value = DIV_ROUND_UP(value, 1000);
		break;
	default:
		break;
	}
	return value;
}

static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
			      const unsigned long end)
{
@@ -58,7 +88,7 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
	return 0;
}

static void cxl_access_coordinate_set(struct access_coordinate *coord,
static void __cxl_access_coordinate_set(struct access_coordinate *coord,
					int access, unsigned int val)
{
	switch (access) {
@@ -85,6 +115,13 @@ static void cxl_access_coordinate_set(struct access_coordinate *coord,
	}
}

static void cxl_access_coordinate_set(struct access_coordinate *coord,
				      int access, unsigned int val)
{
	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
		__cxl_access_coordinate_set(&coord[i], access, val);
}

static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
			       const unsigned long end)
{
@@ -97,7 +134,6 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
	__le16 le_val;
	u64 val;
	u16 len;
	int rc;

	len = le16_to_cpu((__force __le16)hdr->length);
	if (len != size || (unsigned long)hdr + len > end) {
@@ -124,12 +160,10 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,

	le_base = (__force __le64)dslbis->entry_base_unit;
	le_val = (__force __le16)dslbis->entry[0];
	rc = check_mul_overflow(le64_to_cpu(le_base),
				le16_to_cpu(le_val), &val);
	if (rc)
		pr_warn("DSLBIS value overflowed.\n");
	val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
			     dslbis->data_type);

	cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
	cxl_access_coordinate_set(dent->coord, dslbis->data_type, val);

	return 0;
}
@@ -163,25 +197,18 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port,
static int cxl_port_perf_data_calculate(struct cxl_port *port,
					struct xarray *dsmas_xa)
{
	struct access_coordinate ep_c;
	struct access_coordinate coord[ACCESS_COORDINATE_MAX];
	struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
	struct dsmas_entry *dent;
	int valid_entries = 0;
	unsigned long index;
	int rc;

	rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
	rc = cxl_endpoint_get_perf_coordinates(port, ep_c);
	if (rc) {
		dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
		return rc;
	}

	rc = cxl_hb_get_perf_coordinates(port, coord);
	if (rc)  {
		dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
		return rc;
	}

	struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);

	if (!cxl_root)
@@ -193,18 +220,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
	xa_for_each(dsmas_xa, index, dent) {
		int qos_class;

		cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
		/*
		 * Keeping the host bridge coordinates separate from the dsmas
		 * coordinates in order to allow calculation of access class
		 * 0 and 1 for region later.
		 */
		cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
					&coord[ACCESS_COORDINATE_CPU],
					&dent->coord);
		cxl_coordinates_combine(dent->coord, dent->coord, ep_c);
		dent->entries = 1;
		rc = cxl_root->ops->qos_class(cxl_root,
					      &coord[ACCESS_COORDINATE_CPU],
					      &dent->coord[ACCESS_COORDINATE_CPU],
					      1, &qos_class);
		if (rc != 1)
			continue;
@@ -222,14 +241,17 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
			      struct cxl_dpa_perf *dpa_perf)
{
	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
		dpa_perf->coord[i] = dent->coord[i];
	dpa_perf->dpa_range = dent->dpa_range;
	dpa_perf->coord = dent->coord;
	dpa_perf->qos_class = dent->qos_class;
	dev_dbg(dev,
		"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
		dent->dpa_range.start, dpa_perf->qos_class,
		dent->coord.read_bandwidth, dent->coord.write_bandwidth,
		dent->coord.read_latency, dent->coord.write_latency);
		dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth,
		dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth,
		dent->coord[ACCESS_COORDINATE_CPU].read_latency,
		dent->coord[ACCESS_COORDINATE_CPU].write_latency);
}

static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
@@ -461,19 +483,18 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,

		le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
		le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;

		if (check_mul_overflow(le64_to_cpu(le_base),
				       le16_to_cpu(le_val), &val))
			dev_warn(dev, "SSLBIS value overflowed!\n");
		val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
				     sslbis->data_type);

		xa_for_each(&port->dports, index, dport) {
			if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
			    dsp_id == dport->port_id)
				cxl_access_coordinate_set(&dport->sw_coord,
			    dsp_id == dport->port_id) {
				cxl_access_coordinate_set(dport->coord,
							  sslbis->data_type,
							  val);
			}
		}
	}

	return 0;
}
@@ -493,14 +514,7 @@ void cxl_switch_parse_cdat(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);

/**
 * cxl_coordinates_combine - Combine the two input coordinates
 *
 * @out: Output coordinate of c1 and c2 combined
 * @c1: input coordinates
 * @c2: input coordinates
 */
void cxl_coordinates_combine(struct access_coordinate *out,
static void __cxl_coordinates_combine(struct access_coordinate *out,
				      struct access_coordinate *c1,
				      struct access_coordinate *c2)
{
@@ -515,23 +529,34 @@ void cxl_coordinates_combine(struct access_coordinate *out,
		out->read_latency = c1->read_latency + c2->read_latency;
}

/**
 * cxl_coordinates_combine - Combine the two input coordinates
 *
 * @out: Output coordinate of c1 and c2 combined
 * @c1: input coordinates
 * @c2: input coordinates
 */
void cxl_coordinates_combine(struct access_coordinate *out,
			     struct access_coordinate *c1,
			     struct access_coordinate *c2)
{
	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
		__cxl_coordinates_combine(&out[i], &c1[i], &c2[i]);
}

MODULE_IMPORT_NS(CXL);

void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
				    struct cxl_endpoint_decoder *cxled)
{
	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
	struct cxl_port *port = cxlmd->endpoint;
	struct cxl_dev_state *cxlds = cxlmd->cxlds;
	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
	struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
	struct access_coordinate coord;
	struct range dpa = {
			.start = cxled->dpa_res->start,
			.end = cxled->dpa_res->end,
	};
	struct cxl_dpa_perf *perf;
	int rc;

	switch (cxlr->mode) {
	case CXL_DECODER_RAM:
@@ -549,35 +574,16 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
	if (!range_contains(&perf->dpa_range, &dpa))
		return;

	rc = cxl_hb_get_perf_coordinates(port, hb_coord);
	if (rc)  {
		dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
		return;
	}

	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
		/* Pickup the host bridge coords */
		cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);

		/* Get total bandwidth and the worst latency for the cxl region */
		cxlr->coord[i].read_latency = max_t(unsigned int,
						    cxlr->coord[i].read_latency,
						    coord.read_latency);
						    perf->coord[i].read_latency);
		cxlr->coord[i].write_latency = max_t(unsigned int,
						     cxlr->coord[i].write_latency,
						     coord.write_latency);
		cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
		cxlr->coord[i].write_bandwidth += coord.write_bandwidth;

		/*
		 * Convert latency to nanosec from picosec to be consistent
		 * with the resulting latency coordinates computed by the
		 * HMAT_REPORTING code.
		 */
		cxlr->coord[i].read_latency =
			DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
		cxlr->coord[i].write_latency =
			DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
						     perf->coord[i].write_latency);
		cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
		cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
	}
}

+3 −2
Original line number Diff line number Diff line
@@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,

		payload->handles[i++] = gen->hdr.handle;
		dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
			le16_to_cpu(payload->handles[i]));
			le16_to_cpu(payload->handles[i - 1]));

		if (i == max_handles) {
			payload->nr_recs = i;
@@ -958,13 +958,14 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
		.payload_in = &log_type,
		.size_in = sizeof(log_type),
		.payload_out = payload,
		.size_out = mds->payload_size,
		.min_out = struct_size(payload, records, 0),
	};

	do {
		int rc, i;

		mbox_cmd.size_out = mds->payload_size;

		rc = cxl_internal_send_cmd(mds, &mbox_cmd);
		if (rc) {
			dev_err_ratelimited(dev,
+69 −45
Original line number Diff line number Diff line
@@ -2133,36 +2133,44 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
}
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);

/**
 * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
 *				 and host bridge
 *
 * @port: endpoint cxl_port
 * @coord: output access coordinates
 *
 * Return: errno on failure, 0 on success.
 */
int cxl_hb_get_perf_coordinates(struct cxl_port *port,
				struct access_coordinate *coord)
static void add_latency(struct access_coordinate *c, long latency)
{
	struct cxl_port *iter = port;
	struct cxl_dport *dport;
	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
		c[i].write_latency += latency;
		c[i].read_latency += latency;
	}
}

	if (!is_cxl_endpoint(port))
		return -EINVAL;
static bool coordinates_valid(struct access_coordinate *c)
{
	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
		if (c[i].read_bandwidth && c[i].write_bandwidth &&
		    c[i].read_latency && c[i].write_latency)
			continue;
		return false;
	}

	dport = iter->parent_dport;
	while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
		iter = to_cxl_port(iter->dev.parent);
		dport = iter->parent_dport;
	return true;
}

	coord[ACCESS_COORDINATE_LOCAL] =
		dport->hb_coord[ACCESS_COORDINATE_LOCAL];
	coord[ACCESS_COORDINATE_CPU] =
		dport->hb_coord[ACCESS_COORDINATE_CPU];
static void set_min_bandwidth(struct access_coordinate *c, unsigned int bw)
{
	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
		c[i].write_bandwidth = min(c[i].write_bandwidth, bw);
		c[i].read_bandwidth = min(c[i].read_bandwidth, bw);
	}
}

	return 0;
static void set_access_coordinates(struct access_coordinate *out,
				   struct access_coordinate *in)
{
	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
		out[i] = in[i];
}

static bool parent_port_is_cxl_root(struct cxl_port *port)
{
	return is_cxl_root(to_cxl_port(port->dev.parent));
}

/**
@@ -2176,35 +2184,53 @@ int cxl_hb_get_perf_coordinates(struct cxl_port *port,
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
				      struct access_coordinate *coord)
{
	struct access_coordinate c = {
	struct access_coordinate c[] = {
		{
			.read_bandwidth = UINT_MAX,
			.write_bandwidth = UINT_MAX,
		},
		{
			.read_bandwidth = UINT_MAX,
			.write_bandwidth = UINT_MAX,
		},
	};
	struct cxl_port *iter = port;
	struct cxl_dport *dport;
	struct pci_dev *pdev;
	unsigned int bw;
	bool is_cxl_root;

	if (!is_cxl_endpoint(port))
		return -EINVAL;

	/*
	 * Exit the loop when the parent port of the current iter port is cxl
	 * root. The iterative loop starts at the endpoint and gathers the
	 * latency of the CXL link from the current device/port to the connected
	 * downstream port each iteration.
	 */
	do {
		dport = iter->parent_dport;
		iter = to_cxl_port(iter->dev.parent);
		is_cxl_root = parent_port_is_cxl_root(iter);

		/*
	 * Exit the loop when the parent port of the current port is cxl root.
	 * The iterative loop starts at the endpoint and gathers the
	 * latency of the CXL link from the current iter to the next downstream
	 * port each iteration. If the parent is cxl root then there is
	 * nothing to gather.
		 * There's no valid access_coordinate for a root port since RPs do not
		 * have CDAT and therefore needs to be skipped.
		 */
	while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
		cxl_coordinates_combine(&c, &c, &dport->sw_coord);
		c.write_latency += dport->link_latency;
		c.read_latency += dport->link_latency;
		if (!is_cxl_root) {
			if (!coordinates_valid(dport->coord))
				return -EINVAL;
			cxl_coordinates_combine(c, c, dport->coord);
		}
		add_latency(c, dport->link_latency);
	} while (!is_cxl_root);

		iter = to_cxl_port(iter->dev.parent);
	dport = iter->parent_dport;
	}
	/* Retrieve HB coords */
	if (!coordinates_valid(dport->coord))
		return -EINVAL;
	cxl_coordinates_combine(c, c, dport->coord);

	/* Get the calculated PCI paths bandwidth */
	pdev = to_pci_dev(port->uport_dev->parent);
@@ -2213,10 +2239,8 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
		return -ENXIO;
	bw /= BITS_PER_BYTE;

	c.write_bandwidth = min(c.write_bandwidth, bw);
	c.read_bandwidth = min(c.read_bandwidth, bw);

	*coord = c;
	set_min_bandwidth(c, bw);
	set_access_coordinates(coord, c);

	return 0;
}
+3 −2
Original line number Diff line number Diff line
@@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
				struct cxl_register_map *map)
{
	u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
	int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
	u64 offset = ((u64)reg_hi << 32) |
		     (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
@@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
	if (offset > pci_resource_len(pdev, bar)) {
		dev_warn(&pdev->dev,
			 "BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
			 &pdev->resource[bar], &offset, map->reg_type);
			 &pdev->resource[bar], &offset, reg_type);
		return false;
	}

	map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
	map->reg_type = reg_type;
	map->resource = pci_resource_start(pdev, bar) + offset;
	map->max_size = pci_resource_len(pdev, bar) - offset;
	return true;
Loading