Unverified Commit 0e414bf7 authored by Raag Jadav's avatar Raag Jadav Committed by Rodrigo Vivi
Browse files

drm/xe: Expose PCIe link downgrade attributes



Expose sysfs attributes for PCIe link downgrade capability and status.

v2: Move from debugfs to sysfs (Lucas, Rodrigo, Badal)
    Rework macros and their naming (Rodrigo)
v3: Use sysfs_create_files() (Riana)
    Fix checkpatch warning (Riana)
v4: s/downspeed/downgrade (Lucas, Rodrigo, Riana)
v5: Use PCIe Gen agnostic naming (Rodrigo)
v6: s/pcie_gen/auto_link (Lucas)

Signed-off-by: default avatarRaag Jadav <raag.jadav@intel.com>
Reviewed-by: default avatarRiana Tauro <riana.tauro@intel.com>
Link: https://lore.kernel.org/r/20250506054835.3395220-3-raag.jadav@intel.com


Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent f3e875b3
Loading
Loading
Loading
Loading
+91 −2
Original line number Diff line number Diff line
@@ -3,14 +3,16 @@
 * Copyright © 2023 Intel Corporation
 */

#include <linux/device.h>
#include <linux/kobject.h>
#include <linux/pci.h>
#include <linux/sysfs.h>

#include <drm/drm_managed.h>

#include "xe_device.h"
#include "xe_device_sysfs.h"
#include "xe_mmio.h"
#include "xe_pcode_api.h"
#include "xe_pcode.h"
#include "xe_pm.h"

/**
@@ -63,12 +65,93 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,

static DEVICE_ATTR_RW(vram_d3cold_threshold);

/**
 * DOC: PCIe Gen5 Limitations
 *
 * Default link speed of discrete GPUs is determined by configuration parameters
 * stored in their flash memory, which are subject to override through user
 * initiated firmware updates. It has been observed that devices configured with
 * PCIe Gen5 as their default link speed can come across link quality issues due
 * to host or motherboard limitations and may have to auto-downgrade their link
 * to PCIe Gen4 speed when faced with unstable link at Gen5, which makes
 * firmware updates rather risky on such setups. It is required to ensure that
 * the device is capable of auto-downgrading its link to PCIe Gen4 speed before
 * pushing the firmware image with PCIe Gen5 as default configuration. This can
 * be done by reading ``auto_link_downgrade_capable`` sysfs entry, which will
 * denote if the device is capable of auto-downgrading its link to PCIe Gen4
 * speed with boolean output value of ``0`` or ``1``, meaning `incapable` or
 * `capable` respectively.
 *
 * .. code-block:: shell
 *
 *    $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_capable
 *
 * Pushing the firmware image with PCIe Gen5 as default configuration on a auto
 * link downgrade incapable device and facing link instability due to host or
 * motherboard limitations can result in driver failing to bind to the device,
 * making further firmware updates impossible with RMA being the only last
 * resort.
 *
 * Link downgrade status of auto link downgrade capable devices is available
 * through ``auto_link_downgrade_status`` sysfs entry with boolean output value
 * of ``0`` or ``1``, where ``0`` means no auto-downgrading was required during
 * link training (which is the optimal scenario) and ``1`` means the device has
 * auto-downgraded its link to PCIe Gen4 speed due to unstable Gen5 link.
 *
 * .. code-block:: shell
 *
 *    $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_status
 */

static ssize_t
auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *attr, char *buf)
{
	struct pci_dev *pdev = to_pci_dev(dev);
	struct xe_device *xe = pdev_to_xe_device(pdev);
	u32 cap, val;

	xe_pm_runtime_get(xe);
	val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE_CAP);
	xe_pm_runtime_put(xe);

	cap = REG_FIELD_GET(LINK_DOWNGRADE, val);
	return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false);
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable);

static ssize_t
auto_link_downgrade_status_show(struct device *dev, struct device_attribute *attr, char *buf)
{
	struct pci_dev *pdev = to_pci_dev(dev);
	struct xe_device *xe = pdev_to_xe_device(pdev);
	u32 val;
	int ret;

	xe_pm_runtime_get(xe);
	ret = xe_pcode_read(xe_device_get_root_tile(xe),
			    PCODE_MBOX(DGFX_PCODE_STATUS, DGFX_GET_INIT_STATUS, 0),
			    &val, NULL);
	xe_pm_runtime_put(xe);

	return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_LINK_DOWNGRADE_STATUS, val));
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status);

static const struct attribute *auto_link_downgrade_attrs[] = {
	&dev_attr_auto_link_downgrade_capable.attr,
	&dev_attr_auto_link_downgrade_status.attr,
	NULL
};

static void xe_device_sysfs_fini(void *arg)
{
	struct xe_device *xe = arg;

	if (xe->d3cold.capable)
		sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);

	if (xe->info.platform == XE_BATTLEMAGE)
		sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs);
}

int xe_device_sysfs_init(struct xe_device *xe)
@@ -82,5 +165,11 @@ int xe_device_sysfs_init(struct xe_device *xe)
			return ret;
	}

	if (xe->info.platform == XE_BATTLEMAGE) {
		ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs);
		if (ret)
			return ret;
	}

	return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe);
}
+5 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#define   DGFX_PCODE_STATUS		0x7E
#define     DGFX_GET_INIT_STATUS	0x0
#define     DGFX_INIT_STATUS_COMPLETE	0x1
#define     DGFX_LINK_DOWNGRADE_STATUS	REG_BIT(31)

#define   PCODE_POWER_SETUP			0x7C
#define     POWER_SETUP_SUBCOMMAND_READ_I1	0x4
@@ -66,6 +67,10 @@
/* Auxiliary info bits */
#define   AUXINFO_HISTORY_OFFSET	REG_GENMASK(31, 29)

#define BMG_PCIE_CAP			XE_REG(0x138340)
#define   LINK_DOWNGRADE		REG_GENMASK(1, 0)
#define     DOWNGRADE_CAPABLE		2

struct pcode_err_decode {
	int errno;
	const char *str;