Files
linux-net/mm/damon/stat.c
SeongJae Park 84481e705a mm/damon/stat: monitor all System RAM resources
DAMON_STAT usage document (Documentation/admin-guide/mm/damon/stat.rst)
says it monitors the system's entire physical memory.  But, it is
monitoring only the biggest System RAM resource of the system.  When there
are multiple System RAM resources, this results in monitoring only an
unexpectedly small fraction of the physical memory.  For example, suppose
the system has a 500 GiB System RAM, 10 MiB non-System RAM, and 500 GiB
System RAM resources in order on the physical address space.  DAMON_STAT
will monitor only the first 500 GiB System RAM.  This situation is
particularly common on NUMA systems.

Select a physical address range that covers all System RAM areas of the
system, to fix this issue and make it work as documented.

[sj@kernel.org: return error if monitoring target region is invalid]
  Link: https://lkml.kernel.org/r/20260317053631.87907-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20260316235118.873-1-sj@kernel.org
Fixes: 369c415e60 ("mm/damon: introduce DAMON_STAT module")
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org>	[6.17+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2026-03-23 09:35:05 -07:00

316 lines
7.9 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Shows data access monitoring results in simple metrics.
*/
#define pr_fmt(fmt) "damon-stat: " fmt
#include <linux/damon.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sort.h>
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#define MODULE_PARAM_PREFIX "damon_stat."
static int damon_stat_enabled_store(
const char *val, const struct kernel_param *kp);
static const struct kernel_param_ops enabled_param_ops = {
.set = damon_stat_enabled_store,
.get = param_get_bool,
};
static bool enabled __read_mostly = IS_ENABLED(
CONFIG_DAMON_STAT_ENABLED_DEFAULT);
module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
MODULE_PARM_DESC(enabled, "Enable of disable DAMON_STAT");
static unsigned long estimated_memory_bandwidth __read_mostly;
module_param(estimated_memory_bandwidth, ulong, 0400);
MODULE_PARM_DESC(estimated_memory_bandwidth,
"Estimated memory bandwidth usage in bytes per second");
static long memory_idle_ms_percentiles[101] = {0,};
module_param_array(memory_idle_ms_percentiles, long, NULL, 0400);
MODULE_PARM_DESC(memory_idle_ms_percentiles,
"Memory idle time percentiles in milliseconds");
static unsigned long aggr_interval_us;
module_param(aggr_interval_us, ulong, 0400);
MODULE_PARM_DESC(aggr_interval_us,
"Current tuned aggregation interval in microseconds");
static struct damon_ctx *damon_stat_context;
static unsigned long damon_stat_last_refresh_jiffies;
static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c)
{
struct damon_target *t;
struct damon_region *r;
unsigned long access_bytes = 0;
damon_for_each_target(t, c) {
damon_for_each_region(r, t)
access_bytes += (r->ar.end - r->ar.start) *
r->nr_accesses;
}
estimated_memory_bandwidth = access_bytes * USEC_PER_MSEC *
MSEC_PER_SEC / c->attrs.aggr_interval;
}
static int damon_stat_idletime(const struct damon_region *r)
{
if (r->nr_accesses)
return -1 * (r->age + 1);
return r->age + 1;
}
static int damon_stat_cmp_regions(const void *a, const void *b)
{
const struct damon_region *ra = *(const struct damon_region **)a;
const struct damon_region *rb = *(const struct damon_region **)b;
return damon_stat_idletime(ra) - damon_stat_idletime(rb);
}
static int damon_stat_sort_regions(struct damon_ctx *c,
struct damon_region ***sorted_ptr, int *nr_regions_ptr,
unsigned long *total_sz_ptr)
{
struct damon_target *t;
struct damon_region *r;
struct damon_region **region_pointers;
unsigned int nr_regions = 0;
unsigned long total_sz = 0;
damon_for_each_target(t, c) {
/* there is only one target */
region_pointers = kmalloc_objs(*region_pointers,
damon_nr_regions(t));
if (!region_pointers)
return -ENOMEM;
damon_for_each_region(r, t) {
region_pointers[nr_regions++] = r;
total_sz += r->ar.end - r->ar.start;
}
}
sort(region_pointers, nr_regions, sizeof(*region_pointers),
damon_stat_cmp_regions, NULL);
*sorted_ptr = region_pointers;
*nr_regions_ptr = nr_regions;
*total_sz_ptr = total_sz;
return 0;
}
static void damon_stat_set_idletime_percentiles(struct damon_ctx *c)
{
struct damon_region **sorted_regions, *region;
int nr_regions;
unsigned long total_sz, accounted_bytes = 0;
int err, i, next_percentile = 0;
err = damon_stat_sort_regions(c, &sorted_regions, &nr_regions,
&total_sz);
if (err)
return;
for (i = 0; i < nr_regions; i++) {
region = sorted_regions[i];
accounted_bytes += region->ar.end - region->ar.start;
while (next_percentile <= accounted_bytes * 100 / total_sz)
memory_idle_ms_percentiles[next_percentile++] =
damon_stat_idletime(region) *
(long)c->attrs.aggr_interval / USEC_PER_MSEC;
}
kfree(sorted_regions);
}
static int damon_stat_damon_call_fn(void *data)
{
struct damon_ctx *c = data;
/* avoid unnecessarily frequent stat update */
if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies +
msecs_to_jiffies(5 * MSEC_PER_SEC)))
return 0;
damon_stat_last_refresh_jiffies = jiffies;
aggr_interval_us = c->attrs.aggr_interval;
damon_stat_set_estimated_memory_bandwidth(c);
damon_stat_set_idletime_percentiles(c);
return 0;
}
struct damon_stat_system_ram_range_walk_arg {
bool walked;
struct resource res;
};
static int damon_stat_system_ram_walk_fn(struct resource *res, void *arg)
{
struct damon_stat_system_ram_range_walk_arg *a = arg;
if (!a->walked) {
a->walked = true;
a->res.start = res->start;
}
a->res.end = res->end;
return 0;
}
static unsigned long damon_stat_res_to_core_addr(resource_size_t ra,
unsigned long addr_unit)
{
/*
* Use div_u64() for avoiding linking errors related with __udivdi3,
* __aeabi_uldivmod, or similar problems. This should also improve the
* performance optimization (read div_u64() comment for the detail).
*/
if (sizeof(ra) == 8 && sizeof(addr_unit) == 4)
return div_u64(ra, addr_unit);
return ra / addr_unit;
}
static int damon_stat_set_monitoring_region(struct damon_target *t,
unsigned long addr_unit, unsigned long min_region_sz)
{
struct damon_addr_range addr_range;
struct damon_stat_system_ram_range_walk_arg arg = {};
walk_system_ram_res(0, -1, &arg, damon_stat_system_ram_walk_fn);
if (!arg.walked)
return -EINVAL;
addr_range.start = damon_stat_res_to_core_addr(
arg.res.start, addr_unit);
addr_range.end = damon_stat_res_to_core_addr(
arg.res.end + 1, addr_unit);
if (addr_range.end <= addr_range.start)
return -EINVAL;
return damon_set_regions(t, &addr_range, 1, min_region_sz);
}
static struct damon_ctx *damon_stat_build_ctx(void)
{
struct damon_ctx *ctx;
struct damon_attrs attrs;
struct damon_target *target;
ctx = damon_new_ctx();
if (!ctx)
return NULL;
attrs = (struct damon_attrs) {
.sample_interval = 5 * USEC_PER_MSEC,
.aggr_interval = 100 * USEC_PER_MSEC,
.ops_update_interval = 60 * USEC_PER_MSEC * MSEC_PER_SEC,
.min_nr_regions = 10,
.max_nr_regions = 1000,
};
/*
* auto-tune sampling and aggregation interval aiming 4% DAMON-observed
* accesses ratio, keeping sampling interval in [5ms, 10s] range.
*/
attrs.intervals_goal = (struct damon_intervals_goal) {
.access_bp = 400, .aggrs = 3,
.min_sample_us = 5000, .max_sample_us = 10000000,
};
if (damon_set_attrs(ctx, &attrs))
goto free_out;
if (damon_select_ops(ctx, DAMON_OPS_PADDR))
goto free_out;
target = damon_new_target();
if (!target)
goto free_out;
damon_add_target(ctx, target);
if (damon_stat_set_monitoring_region(target, ctx->addr_unit,
ctx->min_region_sz))
goto free_out;
return ctx;
free_out:
damon_destroy_ctx(ctx);
return NULL;
}
static struct damon_call_control call_control = {
.fn = damon_stat_damon_call_fn,
.repeat = true,
};
static int damon_stat_start(void)
{
int err;
damon_stat_context = damon_stat_build_ctx();
if (!damon_stat_context)
return -ENOMEM;
err = damon_start(&damon_stat_context, 1, true);
if (err)
return err;
damon_stat_last_refresh_jiffies = jiffies;
call_control.data = damon_stat_context;
return damon_call(damon_stat_context, &call_control);
}
static void damon_stat_stop(void)
{
damon_stop(&damon_stat_context, 1);
damon_destroy_ctx(damon_stat_context);
}
static int damon_stat_enabled_store(
const char *val, const struct kernel_param *kp)
{
bool is_enabled = enabled;
int err;
err = kstrtobool(val, &enabled);
if (err)
return err;
if (is_enabled == enabled)
return 0;
if (!damon_initialized())
/*
* probably called from command line parsing (parse_args()).
* Cannot call damon_new_ctx(). Let damon_stat_init() handle.
*/
return 0;
if (enabled) {
err = damon_stat_start();
if (err)
enabled = false;
return err;
}
damon_stat_stop();
return 0;
}
static int __init damon_stat_init(void)
{
int err = 0;
if (!damon_initialized()) {
err = -ENOMEM;
goto out;
}
/* probably set via command line */
if (enabled)
err = damon_stat_start();
out:
if (err && enabled)
enabled = false;
return err;
}
module_init(damon_stat_init);