Commit 594902c9 authored by Qinyun Tan's avatar Qinyun Tan Committed by Borislav Petkov (AMD)
Browse files

x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem



In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain
structure representing a NUMA node relies on the cacheinfo interface
(rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map)
for monitoring. The L3 cache information of a SNC NUMA node determines
which domains are summed for the "top level" L3-scoped events.

rdt_mon_domain::ci is initialized using the first online CPU of a NUMA
node. When this CPU goes offline, its shared_cpu_map is cleared to contain
only the offline CPU itself. Subsequently, attempting to read counters
via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning
zero values for "top-level events" without any error indication.

Replace the cacheinfo references in struct rdt_mon_domain and struct
rmid_read with the cacheinfo ID (a unique identifier for the L3 cache).

rdt_domain_hdr::cpu_mask contains the online CPUs associated with that
domain. When reading "top-level events", select a CPU from
rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine
valid CPUs for reading RMID counter via the MSR interface.

Considering all CPUs associated with the L3 cache improves the chances
of picking a housekeeping CPU on which the counter reading work can be
queued, avoiding an unnecessary IPI.

Fixes: 328ea688 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files")
Signed-off-by: default avatarQinyun Tan <qinyuntan@linux.alibaba.com>
Signed-off-by: default avatarBorislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: default avatarReinette Chatre <reinette.chatre@intel.com>
Tested-by: default avatarTony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250530182053.37502-2-qinyuntan@linux.alibaba.com
parent 9afe6529
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
	struct rdt_hw_mon_domain *hw_dom;
	struct rdt_domain_hdr *hdr;
	struct rdt_mon_domain *d;
	struct cacheinfo *ci;
	int err;

	lockdep_assert_held(&domain_list_lock);
@@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
	d = &hw_dom->d_resctrl;
	d->hdr.id = id;
	d->hdr.type = RESCTRL_MON_DOMAIN;
	d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
	if (!d->ci) {
	ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
	if (!ci) {
		pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
		mon_domain_free(hw_dom);
		return;
	}
	d->ci_id = ci->id;
	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);

	arch_mon_domain_online(r, d);
+9 −4
Original line number Diff line number Diff line
@@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
	struct rmid_read rr = {0};
	struct rdt_mon_domain *d;
	struct rdtgroup *rdtgrp;
	int domid, cpu, ret = 0;
	struct rdt_resource *r;
	struct cacheinfo *ci;
	struct mon_data *md;
	int domid, ret = 0;

	rdtgrp = rdtgroup_kn_lock_live(of->kn);
	if (!rdtgrp) {
@@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
		 * one that matches this cache id.
		 */
		list_for_each_entry(d, &r->mon_domains, hdr.list) {
			if (d->ci->id == domid) {
				rr.ci = d->ci;
			if (d->ci_id == domid) {
				rr.ci_id = d->ci_id;
				cpu = cpumask_any(&d->hdr.cpu_mask);
				ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
				if (!ci)
					continue;
				mon_event_read(&rr, r, NULL, rdtgrp,
					       &d->ci->shared_cpu_map, evtid, false);
					       &ci->shared_cpu_map, evtid, false);
				goto checkresult;
			}
		}
+2 −2
Original line number Diff line number Diff line
@@ -98,7 +98,7 @@ struct mon_data {
 *	   domains in @r sharing L3 @ci.id
 * @evtid: Which monitor event to read.
 * @first: Initialize MBM counter when true.
 * @ci:    Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
 * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
 * @err:   Error encountered when reading counter.
 * @val:   Returned value of event counter. If @rgrp is a parent resource group,
 *	   @val includes the sum of event counts from its child resource groups.
@@ -112,7 +112,7 @@ struct rmid_read {
	struct rdt_mon_domain	*d;
	enum resctrl_event_id	evtid;
	bool			first;
	struct cacheinfo	*ci;
	unsigned int		ci_id;
	int			err;
	u64			val;
	void			*arch_mon_ctx;
+4 −2
Original line number Diff line number Diff line
@@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
	int cpu = smp_processor_id();
	struct rdt_mon_domain *d;
	struct cacheinfo *ci;
	struct mbm_state *m;
	int err, ret;
	u64 tval = 0;
@@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
	}

	/* Summing domains that share a cache, must be on a CPU for that cache. */
	if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
	ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
	if (!ci || ci->id != rr->ci_id)
		return -EINVAL;

	/*
@@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
	 */
	ret = -EINVAL;
	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
		if (d->ci->id != rr->ci->id)
		if (d->ci_id != rr->ci_id)
			continue;
		err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
					     rr->evtid, &tval, rr->arch_mon_ctx);
+3 −3
Original line number Diff line number Diff line
@@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
	char name[32];

	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
	if (snc_mode)
		sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);

@@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
		return -EPERM;

	list_for_each_entry(mevt, &r->evt_list, list) {
		domid = do_sum ? d->ci->id : d->hdr.id;
		domid = do_sum ? d->ci_id : d->hdr.id;
		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
		if (WARN_ON_ONCE(!priv))
			return -EINVAL;
@@ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
	lockdep_assert_held(&rdtgroup_mutex);

	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
	kn = kernfs_find_and_get(parent_kn, name);
	if (kn) {
		/*
Loading