Unverified Commit 209e8d26 authored by Maíra Canal's avatar Maíra Canal
Browse files

drm/v3d: Create a CPU job extension for the copy performance query job



A CPU job is a type of job that performs operations that requires CPU
intervention. A copy performance query job is a job that copy the complete
or partial result of a query to a buffer. In order to copy the result of
a performance query to a buffer, we need to get the values from the
performance monitors.

So, create a user extension for the CPU job that enables the creation
of a copy performance query job. This user extension will allow the creation
of a CPU job that copy the results of a performance query to a BO with the
possibility to indicate the availability with a availability bit.

Signed-off-by: default avatarMaíra Canal <mcanal@igalia.com>
Reviewed-by: default avatarIago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231130164420.932823-19-mcanal@igalia.com
parent bae7cb5d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -322,6 +322,7 @@ enum v3d_cpu_job_type {
	V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
	V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY,
	V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY,
	V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY,
};

struct v3d_timestamp_query {
+65 −0
Original line number Diff line number Diff line
@@ -452,12 +452,77 @@ v3d_reset_performance_queries(struct v3d_cpu_job *job)
	}
}

static void
v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query)
{
	struct v3d_performance_query_info *performance_query = &job->performance_query;
	struct v3d_copy_query_results_info *copy = &job->copy;
	struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
	struct v3d_dev *v3d = job->base.v3d;
	struct v3d_perfmon *perfmon;
	u64 counter_values[V3D_PERFCNT_NUM];

	for (int i = 0; i < performance_query->nperfmons; i++) {
		perfmon = v3d_perfmon_find(v3d_priv,
					   performance_query->queries[query].kperfmon_ids[i]);
		if (!perfmon) {
			DRM_DEBUG("Failed to find perfmon.");
			continue;
		}

		v3d_perfmon_stop(v3d, perfmon, true);

		memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values,
		       perfmon->ncounters * sizeof(u64));

		v3d_perfmon_put(perfmon);
	}

	for (int i = 0; i < performance_query->ncounters; i++)
		write_to_buffer(data, i, copy->do_64bit, counter_values[i]);
}

static void
v3d_copy_performance_query(struct v3d_cpu_job *job)
{
	struct v3d_performance_query_info *performance_query = &job->performance_query;
	struct v3d_copy_query_results_info *copy = &job->copy;
	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
	struct dma_fence *fence;
	bool available, write_result;
	u8 *data;

	v3d_get_bo_vaddr(bo);

	data = ((u8 *)bo->vaddr) + copy->offset;

	for (int i = 0; i < performance_query->count; i++) {
		fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj);
		available = fence ? dma_fence_is_signaled(fence) : false;

		write_result = available || copy->do_partial;
		if (write_result)
			v3d_write_performance_query_result(job, data, i);

		if (copy->availability_bit)
			write_to_buffer(data, performance_query->ncounters,
					copy->do_64bit, available ? 1u : 0u);

		data += copy->stride;

		dma_fence_put(fence);
	}

	v3d_put_bo_vaddr(bo);
}

static const v3d_cpu_job_fn cpu_job_function[] = {
	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results,
	[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries,
	[V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query,
};

static struct dma_fence *
+82 −0
Original line number Diff line number Diff line
@@ -672,6 +672,84 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv,
	return 0;
}

static int
v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv,
					  struct drm_v3d_extension __user *ext,
					  struct v3d_cpu_job *job)
{
	u32 __user *syncs;
	u64 __user *kperfmon_ids;
	struct drm_v3d_copy_performance_query copy;

	if (!job) {
		DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
		return -EINVAL;
	}

	if (job->job_type) {
		DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
		return -EINVAL;
	}

	if (copy_from_user(&copy, ext, sizeof(copy)))
		return -EFAULT;

	if (copy.pad)
		return -EINVAL;

	job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY;

	job->performance_query.queries = kvmalloc_array(copy.count,
							sizeof(struct v3d_performance_query),
							GFP_KERNEL);
	if (!job->performance_query.queries)
		return -ENOMEM;

	syncs = u64_to_user_ptr(copy.syncs);
	kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids);

	for (int i = 0; i < copy.count; i++) {
		u32 sync;
		u64 ids;
		u32 __user *ids_pointer;
		u32 id;

		if (copy_from_user(&sync, syncs++, sizeof(sync))) {
			kvfree(job->performance_query.queries);
			return -EFAULT;
		}

		job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);

		if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) {
			kvfree(job->performance_query.queries);
			return -EFAULT;
		}

		ids_pointer = u64_to_user_ptr(ids);

		for (int j = 0; j < copy.nperfmons; j++) {
			if (copy_from_user(&id, ids_pointer++, sizeof(id))) {
				kvfree(job->performance_query.queries);
				return -EFAULT;
			}

			job->performance_query.queries[i].kperfmon_ids[j] = id;
		}
	}
	job->performance_query.count = copy.count;
	job->performance_query.nperfmons = copy.nperfmons;
	job->performance_query.ncounters = copy.ncounters;

	job->copy.do_64bit = copy.do_64bit;
	job->copy.do_partial = copy.do_partial;
	job->copy.availability_bit = copy.availability_bit;
	job->copy.offset = copy.offset;
	job->copy.stride = copy.stride;

	return 0;
}

/* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
 * according to the extension id (name).
 */
@@ -712,6 +790,9 @@ v3d_get_extensions(struct drm_file *file_priv,
		case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY:
			ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job);
			break;
		case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY:
			ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job);
			break;
		default:
			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
			return -EINVAL;
@@ -1092,6 +1173,7 @@ static const unsigned int cpu_job_bo_handle_count[] = {
	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1,
	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2,
	[V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0,
	[V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1,
};

/**
+50 −0
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@ struct drm_v3d_extension {
#define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY	0x04
#define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY	0x05
#define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY	0x06
#define DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY	0x07
	__u32 flags; /* mbz */
};

@@ -519,6 +520,52 @@ struct drm_v3d_reset_performance_query {
	__u64 kperfmon_ids;
};

/**
 * struct drm_v3d_copy_performance_query - ioctl extension for the CPU job to copy
 * performance query results to a buffer
 *
 * When an extension DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY is defined, it
 * points to this extension to define a copy performance query submission. This
 * CPU job will copy the performance queries results to a BO with the offset
 * and stride defined in the extension.
 */
struct drm_v3d_copy_performance_query {
	struct drm_v3d_extension base;

	/* Define if should write to buffer using 64 or 32 bits */
	__u8 do_64bit;

	/* Define if it can write to buffer even if the query is not available */
	__u8 do_partial;

	/* Define if it should write availability bit to buffer */
	__u8 availability_bit;

	/* mbz */
	__u8 pad;

	/* Offset of the buffer in the BO */
	__u32 offset;

	/* Stride of the buffer in the BO */
	__u32 stride;

	/* Number of performance monitors */
	__u32 nperfmons;

	/* Number of performance counters related to this query pool */
	__u32 ncounters;

	/* Number of queries */
	__u32 count;

	/* Array of performance queries's syncobjs to indicate its availability */
	__u64 syncs;

	/* Array of u64 user-pointers that point to an array of kperfmon_ids */
	__u64 kperfmon_ids;
};

struct drm_v3d_submit_cpu {
	/* Pointer to a u32 array of the BOs that are referenced by the job.
	 *
@@ -537,6 +584,9 @@ struct drm_v3d_submit_cpu {
	 *
	 * For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must contain no
	 * BOs.
	 *
	 * For DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY, it must contain one
	 * BO, where the performance queries will be written.
	 */
	__u64 bo_handles;