Commit 2899aa39 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86_cache_for_v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 resource control updates from Borislav Petkov:

 - First part of the MPAM work: split the architectural part of resctrl
   from the filesystem part so that ARM's MPAM varian of resource
   control can be added later while sharing the user interface with x86
   (James Morse)

* tag 'x86_cache_for_v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (30 commits)
  x86/resctrl: Move get_{mon,ctrl}_domain_from_cpu() to live with their callers
  x86/resctrl: Move get_config_index() to a header
  x86/resctrl: Handle throttle_mode for SMBA resources
  x86/resctrl: Move RFTYPE flags to be managed by resctrl
  x86/resctrl: Make resctrl_arch_pseudo_lock_fn() take a plr
  x86/resctrl: Make prefetch_disable_bits belong to the arch code
  x86/resctrl: Allow an architecture to disable pseudo lock
  x86/resctrl: Add resctrl_arch_ prefix to pseudo lock functions
  x86/resctrl: Move mbm_cfg_mask to struct rdt_resource
  x86/resctrl: Move mba_mbps_default_event init to filesystem code
  x86/resctrl: Change mon_event_config_{read,write}() to be arch helpers
  x86/resctrl: Add resctrl_arch_is_evt_configurable() to abstract BMEC
  x86/resctrl: Move the is_mbm_*_enabled() helpers to asm/resctrl.h
  x86/resctrl: Rewrite and move the for_each_*_rdt_resource() walkers
  x86/resctrl: Move monitor init work to a resctrl init call
  x86/resctrl: Move monitor exit work to a resctrl exit call
  x86/resctrl: Add an arch helper to reset one resource
  x86/resctrl: Move resctrl types to a separate header
  x86/resctrl: Move rdt_find_domain() to be visible to arch and fs code
  x86/resctrl: Expose resctrl fs's init function to the rest of the kernel
  ...
parents 90617477 823beb31
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -19934,6 +19934,7 @@ S: Supported
F:	Documentation/arch/x86/resctrl*
F:	arch/x86/include/asm/resctrl.h
F:	arch/x86/kernel/cpu/resctrl/
F:	include/linux/resctrl*.h
F:	tools/testing/selftests/resctrl/
READ-COPY UPDATE (RCU)
+7 −0
Original line number Diff line number Diff line
@@ -498,6 +498,7 @@ config X86_CPU_RESCTRL
	depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
	select KERNFS
	select PROC_CPU_RESCTRL		if PROC_FS
	select RESCTRL_FS_PSEUDO_LOCK
	help
	  Enable x86 CPU resource control support.

@@ -514,6 +515,12 @@ config X86_CPU_RESCTRL

	  Say N if unsure.

config RESCTRL_FS_PSEUDO_LOCK
	bool
	help
	  Software mechanism to pin data in a cache portion using
	  micro-architecture specific knowledge.

config X86_FRED
	bool "Flexible Return and Event Delivery"
	depends on X86_64
+33 −3
Original line number Diff line number Diff line
@@ -4,8 +4,10 @@

#ifdef CONFIG_X86_CPU_RESCTRL

#include <linux/sched.h>
#include <linux/jump_label.h>
#include <linux/percpu.h>
#include <linux/resctrl_types.h>
#include <linux/sched.h>

/*
 * This value can never be a valid CLOSID, and is used when mapping a
@@ -40,6 +42,7 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);

extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;
extern unsigned int rdt_mon_features;

DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
@@ -79,6 +82,21 @@ static inline void resctrl_arch_disable_mon(void)
	static_branch_dec_cpuslocked(&rdt_enable_key);
}

static inline bool resctrl_arch_is_llc_occupancy_enabled(void)
{
	return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
}

static inline bool resctrl_arch_is_mbm_total_enabled(void)
{
	return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
}

static inline bool resctrl_arch_is_mbm_local_enabled(void)
{
	return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
}

/*
 * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
 *
@@ -96,8 +114,8 @@ static inline void resctrl_arch_disable_mon(void)
static inline void __resctrl_sched_in(struct task_struct *tsk)
{
	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
	u32 closid = state->default_closid;
	u32 rmid = state->default_rmid;
	u32 closid = READ_ONCE(state->default_closid);
	u32 rmid = READ_ONCE(state->default_rmid);
	u32 tmp;

	/*
@@ -132,6 +150,13 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
	return val * scale;
}

static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid,
							    u32 rmid)
{
	WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid);
	WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid);
}

static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk,
						u32 closid, u32 rmid)
{
@@ -178,6 +203,11 @@ static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid
static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
					     void *ctx) { };

u64 resctrl_arch_get_prefetch_disable_bits(void);
int resctrl_arch_pseudo_lock_fn(void *_plr);
int resctrl_arch_measure_cycles_lat_fn(void *_plr);
int resctrl_arch_measure_l2_residency(void *_plr);
int resctrl_arch_measure_l3_residency(void *_plr);
void resctrl_cpu_detect(struct cpuinfo_x86 *c);

#else
+3 −2
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_X86_CPU_RESCTRL)		+= core.o rdtgroup.o monitor.o
obj-$(CONFIG_X86_CPU_RESCTRL)	+= ctrlmondata.o pseudo_lock.o
obj-$(CONFIG_X86_CPU_RESCTRL)		+= ctrlmondata.o
obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK)	+= pseudo_lock.o
CFLAGS_pseudo_lock.o = -I$(src)
+48 −133
Original line number Diff line number Diff line
@@ -43,12 +43,6 @@ static DEFINE_MUTEX(domain_list_lock);
 */
DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);

/*
 * Used to store the max resource name width and max resource data width
 * to display the schemata in a tabular format
 */
int max_name_width, max_data_width;

/*
 * Global boolean for rdt_alloc which is true if any
 * resource allocation is enabled.
@@ -62,7 +56,7 @@ static void mba_wrmsr_amd(struct msr_param *m);
#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)

struct rdt_hw_resource rdt_resources_all[] = {
struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
	[RDT_RESOURCE_L3] =
	{
		.r_resctrl = {
@@ -72,9 +66,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
			.mon_scope		= RESCTRL_L3_CACHE,
			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_L3),
			.mon_domains		= mon_domain_init(RDT_RESOURCE_L3),
			.parse_ctrlval		= parse_cbm,
			.format_str		= "%d=%0*x",
			.fflags			= RFTYPE_RES_CACHE,
			.schema_fmt		= RESCTRL_SCHEMA_BITMAP,
		},
		.msr_base		= MSR_IA32_L3_CBM_BASE,
		.msr_update		= cat_wrmsr,
@@ -86,9 +78,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
			.name			= "L2",
			.ctrl_scope		= RESCTRL_L2_CACHE,
			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_L2),
			.parse_ctrlval		= parse_cbm,
			.format_str		= "%d=%0*x",
			.fflags			= RFTYPE_RES_CACHE,
			.schema_fmt		= RESCTRL_SCHEMA_BITMAP,
		},
		.msr_base		= MSR_IA32_L2_CBM_BASE,
		.msr_update		= cat_wrmsr,
@@ -100,9 +90,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
			.name			= "MB",
			.ctrl_scope		= RESCTRL_L3_CACHE,
			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_MBA),
			.parse_ctrlval		= parse_bw,
			.format_str		= "%d=%*u",
			.fflags			= RFTYPE_RES_MB,
			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
		},
	},
	[RDT_RESOURCE_SMBA] =
@@ -112,9 +100,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
			.name			= "SMBA",
			.ctrl_scope		= RESCTRL_L3_CACHE,
			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_SMBA),
			.parse_ctrlval		= parse_bw,
			.format_str		= "%d=%*u",
			.fflags			= RFTYPE_RES_MB,
			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
		},
	},
};
@@ -127,6 +113,14 @@ u32 resctrl_arch_system_num_rmid_idx(void)
	return r->num_rmid;
}

struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
{
	if (l >= RDT_NUM_RESOURCES)
		return NULL;

	return &rdt_resources_all[l].r_resctrl;
}

/*
 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
 * as they do not have CPUID enumeration support for Cache allocation.
@@ -161,7 +155,6 @@ static inline void cache_alloc_hsw_probe(void)
		return;

	hw_res->num_closid = 4;
	r->default_ctrl = max_cbm;
	r->cache.cbm_len = 20;
	r->cache.shareable_bits = 0xc0000;
	r->cache.min_cbm_bits = 2;
@@ -174,7 +167,7 @@ static inline void cache_alloc_hsw_probe(void)
bool is_mba_sc(struct rdt_resource *r)
{
	if (!r)
		return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc;
		r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);

	/*
	 * The software controller support is only applicable to MBA resource.
@@ -217,7 +210,7 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r)
	cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
	hw_res->num_closid = edx.split.cos_max + 1;
	max_delay = eax.split.max_delay + 1;
	r->default_ctrl = MAX_MBA_BW;
	r->membw.max_bw = MAX_MBA_BW;
	r->membw.arch_needs_linear = true;
	if (ecx & MBA_IS_LINEAR) {
		r->membw.delay_linear = true;
@@ -228,16 +221,12 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r)
			return false;
		r->membw.arch_needs_linear = false;
	}
	r->data_width = 3;

	if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
	else
		r->membw.throttle_mode = THREAD_THROTTLE_MAX;

	resctrl_file_fflags_init("thread_throttle_mode",
				 RFTYPE_CTRL_INFO | RFTYPE_RES_MB);

	r->alloc_capable = true;

	return true;
@@ -256,7 +245,7 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)

	cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
	hw_res->num_closid = edx + 1;
	r->default_ctrl = 1 << eax;
	r->membw.max_bw = 1 << eax;

	/* AMD does not use delay */
	r->membw.delay_linear = false;
@@ -269,8 +258,6 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
	r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
	r->membw.min_bw = 0;
	r->membw.bw_gran = 1;
	/* Max value is 2048, Data width should be 4 in decimal */
	r->data_width = 4;

	r->alloc_capable = true;

@@ -283,14 +270,13 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
	union cpuid_0x10_1_eax eax;
	union cpuid_0x10_x_ecx ecx;
	union cpuid_0x10_x_edx edx;
	u32 ebx;
	u32 ebx, default_ctrl;

	cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full);
	hw_res->num_closid = edx.split.cos_max + 1;
	r->cache.cbm_len = eax.split.cbm_len + 1;
	r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
	r->cache.shareable_bits = ebx & r->default_ctrl;
	r->data_width = (r->cache.cbm_len + 3) / 4;
	default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
	r->cache.shareable_bits = ebx & default_ctrl;
	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
		r->cache.arch_has_sparse_bitmasks = ecx.split.noncont;
	r->alloc_capable = true;
@@ -337,7 +323,7 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
		return MAX_MBA_BW - bw;

	pr_warn_once("Non Linear delay-bw map not supported but queried\n");
	return r->default_ctrl;
	return MAX_MBA_BW;
}

static void mba_wrmsr_intel(struct msr_param *m)
@@ -361,36 +347,6 @@ static void cat_wrmsr(struct msr_param *m)
		wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}

struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
{
	struct rdt_ctrl_domain *d;

	lockdep_assert_cpus_held();

	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
		/* Find the domain that contains this CPU */
		if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
			return d;
	}

	return NULL;
}

struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r)
{
	struct rdt_mon_domain *d;

	lockdep_assert_cpus_held();

	list_for_each_entry(d, &r->mon_domains, hdr.list) {
		/* Find the domain that contains this CPU */
		if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
			return d;
	}

	return NULL;
}

u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
{
	return resctrl_to_arch_res(r)->num_closid;
@@ -405,36 +361,6 @@ void rdt_ctrl_update(void *arg)
	hw_res->msr_update(m);
}

/*
 * rdt_find_domain - Search for a domain id in a resource domain list.
 *
 * Search the domain list to find the domain id. If the domain id is
 * found, return the domain. NULL otherwise.  If the domain id is not
 * found (and NULL returned) then the first domain with id bigger than
 * the input id can be returned to the caller via @pos.
 */
struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
				       struct list_head **pos)
{
	struct rdt_domain_hdr *d;
	struct list_head *l;

	list_for_each(l, h) {
		d = list_entry(l, struct rdt_domain_hdr, list);
		/* When id is found, return its domain. */
		if (id == d->id)
			return d;
		/* Stop searching when finding id's position in sorted list. */
		if (id < d->id)
			break;
	}

	if (pos)
		*pos = l;

	return NULL;
}

static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
{
	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
@@ -446,7 +372,7 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
	 * For Memory Allocation: Set b/w requested to 100%
	 */
	for (i = 0; i < hw_res->num_closid; i++, dc++)
		*dc = r->default_ctrl;
		*dc = resctrl_get_default_ctrl(r);
}

static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
@@ -494,13 +420,13 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
{
	size_t tsize;

	if (is_mbm_total_enabled()) {
	if (resctrl_arch_is_mbm_total_enabled()) {
		tsize = sizeof(*hw_dom->arch_mbm_total);
		hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
		if (!hw_dom->arch_mbm_total)
			return -ENOMEM;
	}
	if (is_mbm_local_enabled()) {
	if (resctrl_arch_is_mbm_local_enabled()) {
		tsize = sizeof(*hw_dom->arch_mbm_local);
		hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
		if (!hw_dom->arch_mbm_local) {
@@ -545,7 +471,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
		return;
	}

	hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos);
	hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos);
	if (hdr) {
		if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
			return;
@@ -600,7 +526,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
		return;
	}

	hdr = rdt_find_domain(&r->mon_domains, id, &add_pos);
	hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
	if (hdr) {
		if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
			return;
@@ -665,7 +591,7 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
		return;
	}

	hdr = rdt_find_domain(&r->ctrl_domains, id, NULL);
	hdr = resctrl_find_domain(&r->ctrl_domains, id, NULL);
	if (!hdr) {
		pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
			id, cpu, r->name);
@@ -711,7 +637,7 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
		return;
	}

	hdr = rdt_find_domain(&r->mon_domains, id, NULL);
	hdr = resctrl_find_domain(&r->mon_domains, id, NULL);
	if (!hdr) {
		pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
			id, cpu, r->name);
@@ -786,20 +712,6 @@ static int resctrl_arch_offline_cpu(unsigned int cpu)
	return 0;
}

/*
 * Choose a width for the resource name and resource data based on the
 * resource that has widest name and cbm.
 */
static __init void rdt_init_padding(void)
{
	struct rdt_resource *r;

	for_each_alloc_capable_rdt_resource(r) {
		if (r->data_width > max_data_width)
			max_data_width = r->data_width;
	}
}

enum {
	RDT_FLAG_CMT,
	RDT_FLAG_MBM_TOTAL,
@@ -885,6 +797,21 @@ bool __init rdt_cpu_has(int flag)
	return ret;
}

__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
{
	if (!rdt_cpu_has(X86_FEATURE_BMEC))
		return false;

	switch (evt) {
	case QOS_L3_MBM_TOTAL_EVENT_ID:
		return rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL);
	case QOS_L3_MBM_LOCAL_EVENT_ID:
		return rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL);
	default:
		return false;
	}
}

static __init bool get_mem_config(void)
{
	struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
@@ -963,11 +890,6 @@ static __init bool get_rdt_mon_resources(void)
	if (!rdt_mon_features)
		return false;

	if (is_mbm_local_enabled())
		mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
	else if (is_mbm_total_enabled())
		mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;

	return !rdt_get_mon_l3_config(r);
}

@@ -1086,7 +1008,7 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
	}
}

static int __init resctrl_late_init(void)
static int __init resctrl_arch_late_init(void)
{
	struct rdt_resource *r;
	int state, ret;
@@ -1102,8 +1024,6 @@ static int __init resctrl_late_init(void)
	if (!get_rdt_resources())
		return -ENODEV;

	rdt_init_padding();

	state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
				  "x86/resctrl/cat:online:",
				  resctrl_arch_online_cpu,
@@ -1111,7 +1031,7 @@ static int __init resctrl_late_init(void)
	if (state < 0)
		return state;

	ret = rdtgroup_init();
	ret = resctrl_init();
	if (ret) {
		cpuhp_remove_state(state);
		return ret;
@@ -1127,18 +1047,13 @@ static int __init resctrl_late_init(void)
	return 0;
}

late_initcall(resctrl_late_init);
late_initcall(resctrl_arch_late_init);

static void __exit resctrl_exit(void)
static void __exit resctrl_arch_exit(void)
{
	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;

	cpuhp_remove_state(rdt_online);

	rdtgroup_exit();

	if (r->mon_capable)
		rdt_put_mon_l3_config();
	resctrl_exit();
}

__exitcall(resctrl_exit);
__exitcall(resctrl_arch_exit);
Loading