Commit d278b098 authored by Ilpo Järvinen's avatar Ilpo Järvinen Committed by Bjorn Helgaas
Browse files

thermal: Add PCIe cooling driver

Add a thermal cooling driver to provide path to access PCIe bandwidth
controller using the usual thermal interfaces.

A cooling device is instantiated for controllable PCIe Ports from the
bwctrl service driver.

If registering the cooling device fails, allow bwctrl's probe to succeed
regardless. As cdev in that case contains IS_ERR() pseudo "pointer", clean
that up inside the probe function so the remove side doesn't need to
suddenly make an odd looking IS_ERR() check.

The thermal side state 0 means no throttling, i.e., maximum supported PCIe
Link Speed.

Link: https://lore.kernel.org/r/20241018144755.7875-9-ilpo.jarvinen@linux.intel.com


Signed-off-by: default avatarIlpo Järvinen <ilpo.jarvinen@linux.intel.com>
[bhelgaas: dropped data->cdev test per
https://lore.kernel.org/r/ZzRm1SJTwEMRsAr8@wunner.de

]
Signed-off-by: default avatarBjorn Helgaas <bhelgaas@google.com>
Reviewed-by: default avatarJonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org> # From the cooling device interface perspective
parent de9a6c8d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -17938,6 +17938,8 @@ M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
L:	linux-pci@vger.kernel.org
S:	Supported
F:	drivers/pci/pcie/bwctrl.c
F:	drivers/thermal/pcie_cooling.c
F:	include/linux/pci-bwctrl.h
PCIE DRIVER FOR AMAZON ANNAPURNA LABS
M:	Jonathan Chocron <jonnyc@amazon.com>
+12 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/pci-bwctrl.h>
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -38,10 +39,12 @@
 * struct pcie_bwctrl_data - PCIe bandwidth controller
 * @set_speed_mutex:	Serializes link speed changes
 * @lbms_count:		Count for LBMS (since last reset)
 * @cdev:		Thermal cooling device associated with the port
 */
struct pcie_bwctrl_data {
	struct mutex set_speed_mutex;
	atomic_t lbms_count;
	struct thermal_cooling_device *cdev;
};

/*
@@ -314,11 +317,20 @@ static int pcie_bwnotif_probe(struct pcie_device *srv)

	pci_dbg(port, "enabled with IRQ %d\n", srv->irq);

	/* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
	port->link_bwctrl->cdev = pcie_cooling_device_register(port);
	if (IS_ERR(port->link_bwctrl->cdev))
		port->link_bwctrl->cdev = NULL;

	return 0;
}

static void pcie_bwnotif_remove(struct pcie_device *srv)
{
	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;

	pcie_cooling_device_unregister(data->cdev);

	pcie_bwnotif_disable(srv->port);

	scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem)
+9 −0
Original line number Diff line number Diff line
@@ -220,6 +220,15 @@ config DEVFREQ_THERMAL

	  If you want this support, you should say Y here.

config PCIE_THERMAL
	bool "PCIe cooling support"
	depends on PCIEPORTBUS
	help
	  This implements PCIe cooling mechanism through bandwidth reduction
	  for PCIe devices.

	  If you want this support, you should say Y here.

config THERMAL_EMULATION
	bool "Thermal emulation mode support"
	help
+2 −0
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL) += cpuidle_cooling.o
# devfreq cooling
thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o

thermal_sys-$(CONFIG_PCIE_THERMAL) += pcie_cooling.o

obj-$(CONFIG_K3_THERMAL)	+= k3_bandgap.o k3_j72xx_bandgap.o
# platform thermal drivers
obj-y				+= broadcom/
+80 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * PCIe cooling device
 *
 * Copyright (C) 2023-2024 Intel Corporation
 */

#include <linux/build_bug.h>
#include <linux/cleanup.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pci-bwctrl.h>
#include <linux/slab.h>
#include <linux/sprintf.h>
#include <linux/thermal.h>

#define COOLING_DEV_TYPE_PREFIX		"PCIe_Port_Link_Speed_"

static int pcie_cooling_get_max_level(struct thermal_cooling_device *cdev, unsigned long *state)
{
	struct pci_dev *port = cdev->devdata;

	/* cooling state 0 is same as the maximum PCIe speed */
	*state = port->subordinate->max_bus_speed - PCIE_SPEED_2_5GT;

	return 0;
}

static int pcie_cooling_get_cur_level(struct thermal_cooling_device *cdev, unsigned long *state)
{
	struct pci_dev *port = cdev->devdata;

	/* cooling state 0 is same as the maximum PCIe speed */
	*state = cdev->max_state - (port->subordinate->cur_bus_speed - PCIE_SPEED_2_5GT);

	return 0;
}

static int pcie_cooling_set_cur_level(struct thermal_cooling_device *cdev, unsigned long state)
{
	struct pci_dev *port = cdev->devdata;
	enum pci_bus_speed speed;

	/* cooling state 0 is same as the maximum PCIe speed */
	speed = (cdev->max_state - state) + PCIE_SPEED_2_5GT;

	return pcie_set_target_speed(port, speed, true);
}

static struct thermal_cooling_device_ops pcie_cooling_ops = {
	.get_max_state = pcie_cooling_get_max_level,
	.get_cur_state = pcie_cooling_get_cur_level,
	.set_cur_state = pcie_cooling_set_cur_level,
};

struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port)
{
	char *name __free(kfree) =
		kasprintf(GFP_KERNEL, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port));
	if (!name)
		return ERR_PTR(-ENOMEM);

	return thermal_cooling_device_register(name, port, &pcie_cooling_ops);
}

void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
{
	thermal_cooling_device_unregister(cdev);
}

/* For bus_speed <-> state arithmetic */
static_assert(PCIE_SPEED_2_5GT + 1 == PCIE_SPEED_5_0GT);
static_assert(PCIE_SPEED_5_0GT + 1 == PCIE_SPEED_8_0GT);
static_assert(PCIE_SPEED_8_0GT + 1 == PCIE_SPEED_16_0GT);
static_assert(PCIE_SPEED_16_0GT + 1 == PCIE_SPEED_32_0GT);
static_assert(PCIE_SPEED_32_0GT + 1 == PCIE_SPEED_64_0GT);

MODULE_AUTHOR("Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>");
MODULE_DESCRIPTION("PCIe cooling driver");
Loading