mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
synced 2026-04-21 04:53:46 -04:00
drm/xe: Correlate engine and cpu timestamps with better accuracy
Perf measurements rely on CPU and engine timestamps to correlate events of interest across these time domains. Current mechanisms get these timestamps separately and the calculated delta between these timestamps lack enough accuracy. To improve the accuracy of these time measurements to within a few us, add a query that returns the engine and cpu timestamps captured as close to each other as possible. Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24591 v2: - Fix kernel-doc warnings (CI) - Document input params and group them together (Jose) - s/cs/engine/ (Jose) - Remove padding in the query (Ashutosh) Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Signed-off-by: Francois Dugast <francois.dugast@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> [Rodrigo finished the s/cs/engine renaming]
This commit is contained in:
committed by
Rodrigo Vivi
parent
61d63a59f6
commit
7793d00d1b
@@ -6,10 +6,12 @@
|
||||
#include "xe_query.h"
|
||||
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
#include <drm/ttm/ttm_placement.h>
|
||||
#include <drm/xe_drm.h>
|
||||
|
||||
#include "regs/xe_engine_regs.h"
|
||||
#include "xe_bo.h"
|
||||
#include "xe_device.h"
|
||||
#include "xe_exec_queue.h"
|
||||
@@ -17,6 +19,7 @@
|
||||
#include "xe_gt.h"
|
||||
#include "xe_guc_hwconfig.h"
|
||||
#include "xe_macros.h"
|
||||
#include "xe_mmio.h"
|
||||
#include "xe_ttm_vram_mgr.h"
|
||||
|
||||
static const u16 xe_to_user_engine_class[] = {
|
||||
@@ -27,6 +30,14 @@ static const u16 xe_to_user_engine_class[] = {
|
||||
[XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE,
|
||||
};
|
||||
|
||||
static const enum xe_engine_class user_to_xe_engine_class[] = {
|
||||
[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
|
||||
[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
|
||||
[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
|
||||
[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
|
||||
[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
|
||||
};
|
||||
|
||||
static size_t calc_hw_engine_info_size(struct xe_device *xe)
|
||||
{
|
||||
struct xe_hw_engine *hwe;
|
||||
@@ -45,6 +56,132 @@ static size_t calc_hw_engine_info_size(struct xe_device *xe)
|
||||
return i * sizeof(struct drm_xe_engine_class_instance);
|
||||
}
|
||||
|
||||
typedef u64 (*__ktime_func_t)(void);
|
||||
static __ktime_func_t __clock_id_to_func(clockid_t clk_id)
|
||||
{
|
||||
/*
|
||||
* Use logic same as the perf subsystem to allow user to select the
|
||||
* reference clock id to be used for timestamps.
|
||||
*/
|
||||
switch (clk_id) {
|
||||
case CLOCK_MONOTONIC:
|
||||
return &ktime_get_ns;
|
||||
case CLOCK_MONOTONIC_RAW:
|
||||
return &ktime_get_raw_ns;
|
||||
case CLOCK_REALTIME:
|
||||
return &ktime_get_real_ns;
|
||||
case CLOCK_BOOTTIME:
|
||||
return &ktime_get_boottime_ns;
|
||||
case CLOCK_TAI:
|
||||
return &ktime_get_clocktai_ns;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
__read_timestamps(struct xe_gt *gt,
|
||||
struct xe_reg lower_reg,
|
||||
struct xe_reg upper_reg,
|
||||
u64 *engine_ts,
|
||||
u64 *cpu_ts,
|
||||
u64 *cpu_delta,
|
||||
__ktime_func_t cpu_clock)
|
||||
{
|
||||
u32 upper, lower, old_upper, loop = 0;
|
||||
|
||||
upper = xe_mmio_read32(gt, upper_reg);
|
||||
do {
|
||||
*cpu_delta = local_clock();
|
||||
*cpu_ts = cpu_clock();
|
||||
lower = xe_mmio_read32(gt, lower_reg);
|
||||
*cpu_delta = local_clock() - *cpu_delta;
|
||||
old_upper = upper;
|
||||
upper = xe_mmio_read32(gt, upper_reg);
|
||||
} while (upper != old_upper && loop++ < 2);
|
||||
|
||||
*engine_ts = (u64)upper << 32 | lower;
|
||||
}
|
||||
|
||||
static int
|
||||
query_engine_cycles(struct xe_device *xe,
|
||||
struct drm_xe_device_query *query)
|
||||
{
|
||||
struct drm_xe_query_engine_cycles __user *query_ptr;
|
||||
struct drm_xe_engine_class_instance *eci;
|
||||
struct drm_xe_query_engine_cycles resp;
|
||||
size_t size = sizeof(resp);
|
||||
__ktime_func_t cpu_clock;
|
||||
struct xe_hw_engine *hwe;
|
||||
struct xe_gt *gt;
|
||||
|
||||
if (query->size == 0) {
|
||||
query->size = size;
|
||||
return 0;
|
||||
} else if (XE_IOCTL_DBG(xe, query->size != size)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
query_ptr = u64_to_user_ptr(query->data);
|
||||
if (copy_from_user(&resp, query_ptr, size))
|
||||
return -EFAULT;
|
||||
|
||||
cpu_clock = __clock_id_to_func(resp.clockid);
|
||||
if (!cpu_clock)
|
||||
return -EINVAL;
|
||||
|
||||
eci = &resp.eci;
|
||||
if (eci->gt_id > XE_MAX_GT_PER_TILE)
|
||||
return -EINVAL;
|
||||
|
||||
gt = xe_device_get_gt(xe, eci->gt_id);
|
||||
if (!gt)
|
||||
return -EINVAL;
|
||||
|
||||
if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
|
||||
return -EINVAL;
|
||||
|
||||
hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class],
|
||||
eci->engine_instance, true);
|
||||
if (!hwe)
|
||||
return -EINVAL;
|
||||
|
||||
resp.engine_frequency = gt->info.clock_freq;
|
||||
|
||||
xe_device_mem_access_get(xe);
|
||||
xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
|
||||
|
||||
__read_timestamps(gt,
|
||||
RING_TIMESTAMP(hwe->mmio_base),
|
||||
RING_TIMESTAMP_UDW(hwe->mmio_base),
|
||||
&resp.engine_cycles,
|
||||
&resp.cpu_timestamp,
|
||||
&resp.cpu_delta,
|
||||
cpu_clock);
|
||||
|
||||
xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
|
||||
xe_device_mem_access_put(xe);
|
||||
resp.width = 36;
|
||||
|
||||
/* Only write to the output fields of user query */
|
||||
if (put_user(resp.engine_frequency, &query_ptr->engine_frequency))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(resp.cpu_delta, &query_ptr->cpu_delta))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(resp.engine_cycles, &query_ptr->engine_cycles))
|
||||
return -EFAULT;
|
||||
|
||||
if (put_user(resp.width, &query_ptr->width))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int query_engines(struct xe_device *xe,
|
||||
struct drm_xe_device_query *query)
|
||||
{
|
||||
@@ -369,6 +506,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
|
||||
query_gts,
|
||||
query_hwconfig,
|
||||
query_gt_topology,
|
||||
query_engine_cycles,
|
||||
};
|
||||
|
||||
int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
|
||||
|
||||
Reference in New Issue
Block a user