Commit e568dc3e authored by Lizhi Hou's avatar Lizhi Hou
Browse files

accel/amdxdna: Add IOCTL parameter for telemetry data



Extend DRM_IOCTL_AMDXDNA_GET_INFO to include additional parameters
that allow collection of telemetry data.

Reviewed-by: default avatarMario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: default avatarLizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20251104062546.833771-3-lizhi.hou@amd.com
parent 1556c170
Loading
Loading
Loading
Loading
+50 −6
Original line number Diff line number Diff line
@@ -47,7 +47,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
		ndev->mgmt_chann = NULL;
	}

	if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) {
	if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
		XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
			 msg->opcode, *hdl->data);
		ret = -EINVAL;
@@ -336,11 +336,6 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
		goto fail;
	}

	if (resp.status != AIE2_STATUS_SUCCESS) {
		XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status);
		ret = -EINVAL;
		goto fail;
	}
	XDNA_DBG(xdna, "Query NPU status completed");

	if (size < resp.size) {
@@ -362,6 +357,55 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
	return ret;
}

int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
			 char __user *buf, u32 size,
			 struct amdxdna_drm_query_telemetry_header *header)
{
	DECLARE_AIE2_MSG(get_telemetry, MSG_OP_GET_TELEMETRY);
	struct amdxdna_dev *xdna = ndev->xdna;
	dma_addr_t dma_addr;
	u8 *addr;
	int ret;

	if (header->type >= MAX_TELEMETRY_TYPE)
		return -EINVAL;

	addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
				     DMA_FROM_DEVICE, GFP_KERNEL);
	if (!addr)
		return -ENOMEM;

	req.buf_addr = dma_addr;
	req.buf_size = size;
	req.type = header->type;

	drm_clflush_virt_range(addr, size); /* device can access */
	ret = aie2_send_mgmt_msg_wait(ndev, &msg);
	if (ret) {
		XDNA_ERR(xdna, "Query telemetry failed, status %d", ret);
		goto free_buf;
	}

	if (size < resp.size) {
		ret = -EINVAL;
		XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
		goto free_buf;
	}

	if (copy_to_user(buf, addr, resp.size)) {
		ret = -EFAULT;
		XDNA_ERR(xdna, "Failed to copy telemetry to user space");
		goto free_buf;
	}

	header->major = resp.major;
	header->minor = resp.minor;

free_buf:
	dma_free_noncoherent(xdna->ddev.dev, size, addr, dma_addr, DMA_FROM_DEVICE);
	return ret;
}

int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
				 void *handle, int (*cb)(void*, void __iomem *, size_t))
{
+24 −1
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
enum aie2_msg_opcode {
	MSG_OP_CREATE_CONTEXT              = 0x2,
	MSG_OP_DESTROY_CONTEXT             = 0x3,
	MSG_OP_GET_TELEMETRY               = 0x4,
	MSG_OP_SYNC_BO                     = 0x7,
	MSG_OP_EXECUTE_BUFFER_CF           = 0xC,
	MSG_OP_QUERY_COL_STATUS            = 0xD,
@@ -137,6 +138,28 @@ struct destroy_ctx_resp {
	enum aie2_msg_status	status;
} __packed;

enum telemetry_type {
	TELEMETRY_TYPE_DISABLED,
	TELEMETRY_TYPE_HEALTH,
	TELEMETRY_TYPE_ERROR_INFO,
	TELEMETRY_TYPE_PROFILING,
	TELEMETRY_TYPE_DEBUG,
	MAX_TELEMETRY_TYPE
};

struct get_telemetry_req {
	enum telemetry_type	type;
	__u64	buf_addr;
	__u32	buf_size;
} __packed;

struct get_telemetry_resp {
	__u32	major;
	__u32	minor;
	__u32	size;
	enum aie2_msg_status	status;
} __packed;

struct execute_buffer_req {
	__u32	cu_idx;
	__u32	payload[19];
+73 −0
Original line number Diff line number Diff line
@@ -862,6 +862,76 @@ static int aie2_query_resource_info(struct amdxdna_client *client,
	return 0;
}

static int aie2_fill_hwctx_map(struct amdxdna_hwctx *hwctx, void *arg)
{
	struct amdxdna_dev *xdna = hwctx->client->xdna;
	u32 *map = arg;

	if (hwctx->fw_ctx_id >= xdna->dev_handle->priv->hwctx_limit) {
		XDNA_ERR(xdna, "Invalid fw ctx id %d/%d ", hwctx->fw_ctx_id,
			 xdna->dev_handle->priv->hwctx_limit);
		return -EINVAL;
	}

	map[hwctx->fw_ctx_id] = hwctx->id;
	return 0;
}

static int aie2_get_telemetry(struct amdxdna_client *client,
			      struct amdxdna_drm_get_info *args)
{
	struct amdxdna_drm_query_telemetry_header *header __free(kfree) = NULL;
	u32 telemetry_data_sz, header_sz, elem_num;
	struct amdxdna_dev *xdna = client->xdna;
	struct amdxdna_client *tmp_client;
	int ret;

	elem_num = xdna->dev_handle->priv->hwctx_limit;
	header_sz = struct_size(header, map, elem_num);
	if (args->buffer_size <= header_sz) {
		XDNA_ERR(xdna, "Invalid buffer size");
		return -EINVAL;
	}

	telemetry_data_sz = args->buffer_size - header_sz;
	if (telemetry_data_sz > SZ_4M) {
		XDNA_ERR(xdna, "Buffer size is too big, %d", telemetry_data_sz);
		return -EINVAL;
	}

	header = kzalloc(header_sz, GFP_KERNEL);
	if (!header)
		return -ENOMEM;

	if (copy_from_user(header, u64_to_user_ptr(args->buffer), sizeof(*header))) {
		XDNA_ERR(xdna, "Failed to copy telemetry header from user");
		return -EFAULT;
	}

	header->map_num_elements = elem_num;
	list_for_each_entry(tmp_client, &xdna->client_list, node) {
		ret = amdxdna_hwctx_walk(tmp_client, &header->map,
					 aie2_fill_hwctx_map);
		if (ret)
			return ret;
	}

	ret = aie2_query_telemetry(xdna->dev_handle,
				   u64_to_user_ptr(args->buffer + header_sz),
				   telemetry_data_sz, header);
	if (ret) {
		XDNA_ERR(xdna, "Query telemetry failed ret %d", ret);
		return ret;
	}

	if (copy_to_user(u64_to_user_ptr(args->buffer), header, header_sz)) {
		XDNA_ERR(xdna, "Copy header failed");
		return -EFAULT;
	}

	return 0;
}

static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
{
	struct amdxdna_dev *xdna = client->xdna;
@@ -896,6 +966,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
	case DRM_AMDXDNA_GET_POWER_MODE:
		ret = aie2_get_power_mode(client, args);
		break;
	case DRM_AMDXDNA_QUERY_TELEMETRY:
		ret = aie2_get_telemetry(client, args);
		break;
	case DRM_AMDXDNA_QUERY_RESOURCE_INFO:
		ret = aie2_query_resource_info(client, args);
		break;
+3 −0
Original line number Diff line number Diff line
@@ -305,6 +305,9 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev,
			 char __user *buf, u32 size,
			 struct amdxdna_drm_query_telemetry_header *header);
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
				 void *handle, int (*cb)(void*, void __iomem *, size_t));
int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+4 −2
Original line number Diff line number Diff line
@@ -16,16 +16,18 @@ struct xdna_notify {
	u32			*data;
	size_t			size;
	int			error;
	u32			*status;
};

#define DECLARE_XDNA_MSG_COMMON(name, op, status)			\
#define DECLARE_XDNA_MSG_COMMON(name, op, s)				\
	struct name##_req	req = { 0 };				\
	struct name##_resp	resp = { status	};			\
	struct name##_resp	resp = { .status = s };			\
	struct xdna_notify	hdl = {					\
		.error = 0,						\
		.data = (u32 *)&resp,					\
		.size = sizeof(resp),					\
		.comp = COMPLETION_INITIALIZER_ONSTACK(hdl.comp),	\
		.status = (u32 *)&resp.status,				\
	};								\
	struct xdna_mailbox_msg msg = {					\
		.send_data = (u8 *)&req,				\
Loading