Commit c51bb53d authored by David Yat Sin's avatar David Yat Sin Committed by Alex Deucher
Browse files

drm/amdkfd: Add metadata ring buffer for compute



Add support for separate ring-buffer for metadata packets when using
compute queues. Userspace application allocate the metadata ring-buffer
and the queue ring-buffer with a single allocation. The metadata
ring-buffer starts after the queue ring-buffer.

Signed-off-by: default avatarDavid Yat Sin <David.YatSin@amd.com>
Reviewed-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent d0c989a0
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -221,6 +221,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
		pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
	}

	if ((args->metadata_ring_size != 0) && !is_power_of_2(args->metadata_ring_size)) {
		pr_err("Metadata ring size must be a power of 2 or 0\n");
		return -EINVAL;
	}

	if (!access_ok((const void __user *) args->read_pointer_address,
			sizeof(uint32_t))) {
		pr_err("Can't access read pointer\n");
@@ -255,6 +260,9 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
	q_properties->priority = args->queue_priority;
	q_properties->queue_address = args->ring_base_address;
	q_properties->queue_size = args->ring_size;
	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
		q_properties->metadata_queue_size = args->metadata_ring_size;

	q_properties->read_ptr = (void __user *)args->read_pointer_address;
	q_properties->write_ptr = (void __user *)args->write_pointer_address;
	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
+21 −0
Original line number Diff line number Diff line
@@ -266,6 +266,27 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);

	if (q->metadata_queue_size) {
		/* On GC 12.1 is 64 DWs which is 4 times size of AQL packet */
		if (q->metadata_queue_size == q->queue_size * 4) {
			/*
			 * User application allocates main queue ring and metadata queue ring
			 * with a single allocation. metadata queue ring starts after main
			 * queue ring.
			 */
			m->cp_hqd_kd_base =
				lower_32_bits((q->queue_address + q->queue_size) >> 8);
			m->cp_hqd_kd_base_hi =
				upper_32_bits((q->queue_address + q->queue_size) >> 8);

			m->cp_hqd_kd_cntl |= CP_HQD_KD_CNTL__KD_FETCHER_ENABLE_MASK;
			/* KD_SIZE = 2 for metadata packet = 64 DWs */
			m->cp_hqd_kd_cntl |= 2 << CP_HQD_KD_CNTL__KD_SIZE__SHIFT;
		} else {
			pr_warn("Invalid metadata ring size, metadata queue will be ignored\n");
		}
	}

	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+2 −1
Original line number Diff line number Diff line
@@ -507,6 +507,7 @@ struct queue_properties {
	unsigned int queue_id;
	uint64_t queue_address;
	uint64_t queue_size;
	uint64_t metadata_queue_size;
	uint32_t priority;
	uint32_t queue_percent;
	void __user *read_ptr;
+5 −2
Original line number Diff line number Diff line
@@ -247,9 +247,12 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope
	    properties->format == KFD_QUEUE_FORMAT_AQL &&
	    topo_dev->node_props.gfx_target_version >= 70000 &&
	    topo_dev->node_props.gfx_target_version < 90000)
		expected_queue_size = properties->queue_size / 2;
		/* metadata_queue_size not supported on GFX7/GFX8 */
		expected_queue_size =
			properties->queue_size / 2;
	else
		expected_queue_size = properties->queue_size;
		expected_queue_size =
			properties->queue_size + properties->metadata_queue_size;

	vm = drm_priv_to_vm(pdd->drm_priv);
	err = amdgpu_bo_reserve(vm->root.bo, false);
+3 −2
Original line number Diff line number Diff line
@@ -47,9 +47,10 @@
 * - 1.19 - Add a new ioctl to craete secondary kfd processes
 * - 1.20 - Trap handler support for expert scheduling mode available
 * - 1.21 - Debugger support to subscribe to LDS out-of-address exceptions
 * - 1.22 - Add queue creation with metadata ring base address
 */
#define KFD_IOCTL_MAJOR_VERSION 1
#define KFD_IOCTL_MINOR_VERSION 21
#define KFD_IOCTL_MINOR_VERSION 22

struct kfd_ioctl_get_version_args {
	__u32 major_version;	/* from KFD */
@@ -87,7 +88,7 @@ struct kfd_ioctl_create_queue_args {
	__u32 ctx_save_restore_size;	/* to KFD */
	__u32 ctl_stack_size;		/* to KFD */
	__u32 sdma_engine_id;		/* to KFD */
	__u32 pad;
	__u32 metadata_ring_size;	/* to KFD */
};

struct kfd_ioctl_destroy_queue_args {