Commit 71829d7f authored by Lizhi Hou's avatar Lizhi Hou
Browse files

accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported



MSG_OP_CHAIN_EXEC_NPU is a unified mailbox message that replaces
MSG_OP_CHAIN_EXEC_BUFFER_CF and MSG_OP_CHAIN_EXEC_DPU.

Add driver logic to check firmware version, and if MSG_OP_CHAIN_EXEC_NPU
is supported, uses it to submit firmware commands.

Reviewed-by: default avatarMario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: default avatarLizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20251031014700.2919349-1-lizhi.hou@amd.com
parent 3668133e
Loading
Loading
Loading
Loading
+286 −157
Original line number Diff line number Diff line
@@ -27,6 +27,8 @@
#define DECLARE_AIE2_MSG(name, op) \
	DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)

#define EXEC_MSG_OPS(xdna)	((xdna)->dev_handle->exec_msg_ops)

static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
				   struct xdna_mailbox_msg *msg)
{
@@ -433,177 +435,291 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
	return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
}

int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
		 int (*notify_cb)(void *, void __iomem *, size_t))
static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
				 size_t *size, u32 *msg_op)
{
	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
	struct amdxdna_dev *xdna = hwctx->client->xdna;
	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
	union {
		struct execute_buffer_req ebuf;
		struct exec_dpu_req dpu;
	} req;
	struct xdna_mailbox_msg msg;
	u32 payload_len;
	void *payload;
	int cu_idx;
	int ret;
	u32 op;
	struct execute_buffer_req *cu_req = req;
	u32 cmd_len;
	void *cmd;

	if (!chann)
		return -ENODEV;
	cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
	if (cmd_len > sizeof(cu_req->payload))
		return -EINVAL;

	payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
	if (!payload) {
		XDNA_ERR(xdna, "Invalid command, cannot get payload");
	cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
	if (cu_req->cu_idx == INVALID_CU_IDX)
		return -EINVAL;

	memcpy(cu_req->payload, cmd, cmd_len);

	*size = sizeof(*cu_req);
	*msg_op = MSG_OP_EXECUTE_BUFFER_CF;
	return 0;
}

	cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
	if (cu_idx < 0) {
		XDNA_DBG(xdna, "Invalid cu idx");
static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
				  size_t *size, u32 *msg_op)
{
	struct exec_dpu_req *dpu_req = req;
	struct amdxdna_cmd_start_npu *sn;
	u32 cmd_len;

	sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
	if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
		return -EINVAL;

	dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
	if (dpu_req->cu_idx == INVALID_CU_IDX)
		return -EINVAL;

	dpu_req->inst_buf_addr = sn->buffer;
	dpu_req->inst_size = sn->buffer_size;
	dpu_req->inst_prop_cnt = sn->prop_count;
	memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));

	*size = sizeof(*dpu_req);
	*msg_op = MSG_OP_EXEC_DPU;
	return 0;
}

	op = amdxdna_cmd_get_op(cmd_abo);
	switch (op) {
	case ERT_START_CU:
		if (unlikely(payload_len > sizeof(req.ebuf.payload)))
			XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
		req.ebuf.cu_idx = cu_idx;
		memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
		msg.send_size = sizeof(req.ebuf);
		msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
		break;
	case ERT_START_NPU: {
		struct amdxdna_cmd_start_npu *sn = payload;

		if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
			XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
		req.dpu.inst_buf_addr = sn->buffer;
		req.dpu.inst_size = sn->buffer_size;
		req.dpu.inst_prop_cnt = sn->prop_count;
		req.dpu.cu_idx = cu_idx;
		memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
		msg.send_size = sizeof(req.dpu);
		msg.opcode = MSG_OP_EXEC_DPU;
		break;
static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
{
	struct cmd_chain_req *chain_req = req;

	chain_req->buf_addr = slot_addr;
	chain_req->buf_size = size;
	chain_req->count = cmd_cnt;
}
	default:
		XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
		return -EINVAL;

static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
{
	struct cmd_chain_npu_req *npu_chain_req = req;

	npu_chain_req->flags = 0;
	npu_chain_req->reserved = 0;
	npu_chain_req->buf_addr = slot_addr;
	npu_chain_req->buf_size = size;
	npu_chain_req->count = cmd_cnt;
}
	msg.handle = job;
	msg.notify_cb = notify_cb;
	msg.send_data = (u8 *)&req;
	print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
			     0x40, false);

	ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
	if (ret) {
		XDNA_ERR(xdna, "Send message failed");
		return ret;
static int
aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
	struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
	u32 cmd_len;
	void *cmd;

	cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
	if (*size < sizeof(*cf_slot) + cmd_len)
		return -EINVAL;

	cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
	if (cf_slot->cu_idx == INVALID_CU_IDX)
		return -EINVAL;

	cf_slot->arg_cnt = cmd_len / sizeof(u32);
	memcpy(cf_slot->args, cmd, cmd_len);
	/* Accurate slot size to hint firmware to do necessary copy */
	*size = sizeof(*cf_slot) + cmd_len;
	return 0;
}

static int
aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
	struct cmd_chain_slot_dpu *dpu_slot = slot;
	struct amdxdna_cmd_start_npu *sn;
	u32 cmd_len;
	u32 arg_sz;

	sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
	arg_sz = cmd_len - sizeof(*sn);
	if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
		return -EINVAL;

	if (*size < sizeof(*dpu_slot) + arg_sz)
		return -EINVAL;

	dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
	if (dpu_slot->cu_idx == INVALID_CU_IDX)
		return -EINVAL;

	dpu_slot->inst_buf_addr = sn->buffer;
	dpu_slot->inst_size = sn->buffer_size;
	dpu_slot->inst_prop_cnt = sn->prop_count;
	dpu_slot->arg_cnt = arg_sz / sizeof(u32);
	memcpy(dpu_slot->args, sn->prop_args, arg_sz);

	/* Accurate slot size to hint firmware to do necessary copy */
	*size = sizeof(*dpu_slot) + arg_sz;
	return 0;
}

static u32 aie2_get_chain_msg_op(u32 cmd_op)
{
	switch (cmd_op) {
	case ERT_START_CU:
		return MSG_OP_CHAIN_EXEC_BUFFER_CF;
	case ERT_START_NPU:
		return MSG_OP_CHAIN_EXEC_DPU;
	default:
		break;
	}

	return MSG_OP_MAX_OPCODE;
}

static struct aie2_exec_msg_ops legacy_exec_message_ops = {
	.init_cu_req = aie2_init_exec_cu_req,
	.init_dpu_req = aie2_init_exec_dpu_req,
	.init_chain_req = aie2_init_exec_chain_req,
	.fill_cf_slot = aie2_cmdlist_fill_cf,
	.fill_dpu_slot = aie2_cmdlist_fill_dpu,
	.get_chain_msg_op = aie2_get_chain_msg_op,
};

static int
aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
			      struct amdxdna_gem_obj *abo, u32 *size)
aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
	struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
	int cu_idx = amdxdna_cmd_get_cu_idx(abo);
	u32 payload_len;
	void *payload;
	struct cmd_chain_slot_npu *npu_slot = slot;
	u32 cmd_len;
	void *cmd;

	if (cu_idx < 0)
	cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
	if (*size < sizeof(*npu_slot) + cmd_len)
		return -EINVAL;

	payload = amdxdna_cmd_get_payload(abo, &payload_len);
	if (!payload)
	npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
	if (npu_slot->cu_idx == INVALID_CU_IDX)
		return -EINVAL;

	if (!slot_has_space(*buf, offset, payload_len))
		return -ENOSPC;
	memset(npu_slot, 0, sizeof(*npu_slot));
	npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
	npu_slot->arg_cnt = cmd_len / sizeof(u32);
	memcpy(npu_slot->args, cmd, cmd_len);

	buf->cu_idx = cu_idx;
	buf->arg_cnt = payload_len / sizeof(u32);
	memcpy(buf->args, payload, payload_len);
	/* Accurate buf size to hint firmware to do necessary copy */
	*size = sizeof(*buf) + payload_len;
	*size = sizeof(*npu_slot) + cmd_len;
	return 0;
}

static int
aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
			       struct amdxdna_gem_obj *abo, u32 *size)
aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
	struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
	int cu_idx = amdxdna_cmd_get_cu_idx(abo);
	struct cmd_chain_slot_npu *npu_slot = slot;
	struct amdxdna_cmd_start_npu *sn;
	u32 payload_len;
	void *payload;
	u32 cmd_len;
	u32 arg_sz;

	if (cu_idx < 0)
	sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
	arg_sz = cmd_len - sizeof(*sn);
	if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
		return -EINVAL;

	payload = amdxdna_cmd_get_payload(abo, &payload_len);
	if (!payload)
		return -EINVAL;
	sn = payload;
	arg_sz = payload_len - sizeof(*sn);
	if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
	if (*size < sizeof(*npu_slot) + arg_sz)
		return -EINVAL;

	if (!slot_has_space(*buf, offset, arg_sz))
		return -ENOSPC;
	npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
	if (npu_slot->cu_idx == INVALID_CU_IDX)
		return -EINVAL;

	buf->inst_buf_addr = sn->buffer;
	buf->inst_size = sn->buffer_size;
	buf->inst_prop_cnt = sn->prop_count;
	buf->cu_idx = cu_idx;
	buf->arg_cnt = arg_sz / sizeof(u32);
	memcpy(buf->args, sn->prop_args, arg_sz);
	memset(npu_slot, 0, sizeof(*npu_slot));
	npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
	npu_slot->inst_buf_addr = sn->buffer;
	npu_slot->inst_size = sn->buffer_size;
	npu_slot->inst_prop_cnt = sn->prop_count;
	npu_slot->arg_cnt = arg_sz / sizeof(u32);
	memcpy(npu_slot->args, sn->prop_args, arg_sz);

	/* Accurate buf size to hint firmware to do necessary copy */
	*size = sizeof(*buf) + arg_sz;
	*size = sizeof(*npu_slot) + arg_sz;
	return 0;
}

static int
aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
			   struct amdxdna_gem_obj *abo, u32 *size)
static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
{
	return MSG_OP_CHAIN_EXEC_NPU;
}

static struct aie2_exec_msg_ops npu_exec_message_ops = {
	.init_cu_req = aie2_init_exec_cu_req,
	.init_dpu_req = aie2_init_exec_dpu_req,
	.init_chain_req = aie2_init_npu_chain_req,
	.fill_cf_slot = aie2_cmdlist_fill_npu_cf,
	.fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
	.get_chain_msg_op = aie2_get_npu_chain_msg_op,
};

static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
			      size_t *size, u32 *msg_op)
{
	u32 this_op = amdxdna_cmd_get_op(abo);
	void *cmd_buf = cmdbuf_abo->mem.kva;
	struct amdxdna_dev *xdna = cmd_abo->client->xdna;
	int ret;
	u32 op;

	if (this_op != op) {
		ret = -EINVAL;
		goto done;
	}

	op = amdxdna_cmd_get_op(cmd_abo);
	switch (op) {
	case ERT_START_CU:
		ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
		ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op);
		if (ret) {
			XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
			return ret;
		}
		break;
	case ERT_START_NPU:
		ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
		ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op);
		if (ret) {
			XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
			return ret;
		}

		break;
	default:
		XDNA_ERR(xdna, "Unsupported op %d", op);
		ret = -EOPNOTSUPP;
		break;
	}

done:
	if (ret) {
		XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
			 op, ret);
	return ret;
}

static int
aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
		       size_t *size, u32 *cmd_op)
{
	struct amdxdna_dev *xdna = cmd_abo->client->xdna;
	int ret;
	u32 op;

	op = amdxdna_cmd_get_op(cmd_abo);
	if (*cmd_op == ERT_INVALID_CMD)
		*cmd_op = op;
	else if (op != *cmd_op)
		return -EINVAL;

	switch (op) {
	case ERT_START_CU:
		ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
		break;
	case ERT_START_NPU:
		ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
		break;
	default:
		XDNA_INFO(xdna, "Unsupported op %d", op);
		ret = -EOPNOTSUPP;
		break;
	}

	return ret;
}

void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
{
	if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
		ndev->exec_msg_ops = &npu_exec_message_ops;
	else
		ndev->exec_msg_ops = &legacy_exec_message_ops;
}

static inline struct amdxdna_gem_obj *
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
{
@@ -612,29 +728,36 @@ aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
	return job->hwctx->priv->cmd_buf[idx];
}

static void
aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
			     struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
		 int (*notify_cb)(void *, void __iomem *, size_t))
{
	req->buf_addr = cmdbuf_abo->mem.dev_addr;
	req->buf_size = size;
	req->count = cnt;
	drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
	XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
		 req->buf_addr, size, cnt);
}
	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
	struct amdxdna_dev *xdna = hwctx->client->xdna;
	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
	struct xdna_mailbox_msg msg;
	union exec_req req;
	int ret;

static inline u32
aie2_cmd_op_to_msg_op(u32 op)
{
	switch (op) {
	case ERT_START_CU:
		return MSG_OP_CHAIN_EXEC_BUFFER_CF;
	case ERT_START_NPU:
		return MSG_OP_CHAIN_EXEC_DPU;
	default:
		return MSG_OP_MAX_OPCODE;
	if (!chann)
		return -ENODEV;

	ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
	if (ret)
		return ret;

	msg.handle = job;
	msg.notify_cb = notify_cb;
	msg.send_data = (u8 *)&req;
	print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
			     0x40, false);

	ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
	if (ret) {
		XDNA_ERR(xdna, "Send message failed");
		return ret;
	}

	return 0;
}

int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
@@ -645,12 +768,13 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
	struct amdxdna_client *client = hwctx->client;
	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
	struct amdxdna_dev *xdna = client->xdna;
	struct amdxdna_cmd_chain *payload;
	struct xdna_mailbox_msg msg;
	struct cmd_chain_req req;
	union exec_chain_req req;
	u32 payload_len;
	u32 offset = 0;
	u32 size;
	size_t size;
	int ret;
	u32 op;
	u32 i;
@@ -661,41 +785,42 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
	    payload_len < struct_size(payload, data, payload->command_count))
		return -EINVAL;

	op = ERT_INVALID_CMD;
	for (i = 0; i < payload->command_count; i++) {
		u32 boh = (u32)(payload->data[i]);
		struct amdxdna_gem_obj *abo;

		abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
		if (!abo) {
			XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
			XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
			return -ENOENT;
		}

		/* All sub-cmd should have same op, use the first one. */
		if (i == 0)
			op = amdxdna_cmd_get_op(abo);

		ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
		size = cmdbuf_abo->mem.size - offset;
		ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
					     abo, &size, &op);
		amdxdna_gem_put_obj(abo);
		if (ret)
			return -EINVAL;
			return ret;

		offset += size;
	}
	msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
	if (msg.opcode == MSG_OP_MAX_OPCODE)
		return -EOPNOTSUPP;

	/* The offset is the accumulated total size of the cmd buffer */
	aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
	EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
					   offset, payload->command_count);
	drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);

	msg.opcode = aie2_cmd_op_to_msg_op(op);
	if (msg.opcode == MSG_OP_MAX_OPCODE)
		return -EOPNOTSUPP;
	msg.handle = job;
	msg.notify_cb = notify_cb;
	msg.send_data = (u8 *)&req;
	msg.send_size = sizeof(req);
	ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
	if (ret) {
		XDNA_ERR(hwctx->client->xdna, "Send message failed");
		XDNA_ERR(xdna, "Send message failed");
		return ret;
	}

@@ -708,23 +833,27 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
{
	struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
	struct mailbox_channel *chann = hwctx->priv->mbox_chann;
	struct amdxdna_dev *xdna = hwctx->client->xdna;
	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
	struct xdna_mailbox_msg msg;
	struct cmd_chain_req req;
	u32 size;
	union exec_chain_req req;
	u32 op = ERT_INVALID_CMD;
	size_t size;
	int ret;
	u32 op;

	op = amdxdna_cmd_get_op(cmd_abo);
	ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
	size = cmdbuf_abo->mem.size;
	ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op);
	if (ret)
		return ret;

	aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);

	msg.opcode = aie2_cmd_op_to_msg_op(op);
	msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
	if (msg.opcode == MSG_OP_MAX_OPCODE)
		return -EOPNOTSUPP;

	EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
					   size, 1);
	drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);

	msg.handle = job;
	msg.notify_cb = notify_cb;
	msg.send_data = (u8 *)&req;
+39 −3
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ enum aie2_msg_opcode {
	MSG_OP_CHAIN_EXEC_BUFFER_CF        = 0x12,
	MSG_OP_CHAIN_EXEC_DPU              = 0x13,
	MSG_OP_CONFIG_DEBUG_BO             = 0x14,
	MSG_OP_CHAIN_EXEC_NPU              = 0x18,
	MSG_OP_MAX_XRT_OPCODE,
	MSG_OP_SUSPEND                     = 0x101,
	MSG_OP_RESUME                      = 0x102,
@@ -149,6 +150,16 @@ struct exec_dpu_req {
	__u32	payload[35];
} __packed;

enum exec_npu_type {
	EXEC_NPU_TYPE_NON_ELF		= 0x1,
	EXEC_NPU_TYPE_PARTIAL_ELF	= 0x2,
};

union exec_req {
	struct execute_buffer_req ebuf;
	struct exec_dpu_req dpu_req;
};

struct execute_buffer_resp {
	enum aie2_msg_status	status;
} __packed;
@@ -320,9 +331,6 @@ struct async_event_msg_resp {
} __packed;

#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
#define slot_has_space(slot, offset, payload_size)		\
	(MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) +	\
	 sizeof(typeof(slot)))

struct cmd_chain_slot_execbuf_cf {
	__u32 cu_idx;
@@ -340,12 +348,40 @@ struct cmd_chain_slot_dpu {
	__u32 args[] __counted_by(arg_cnt);
};

#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
struct cmd_chain_slot_npu {
	enum exec_npu_type type;
	u64 inst_buf_addr;
	u64 save_buf_addr;
	u64 restore_buf_addr;
	u32 inst_size;
	u32 save_size;
	u32 restore_size;
	u32 inst_prop_cnt;
	u32 cu_idx;
	u32 arg_cnt;
	u32 args[] __counted_by(arg_cnt);
} __packed;

struct cmd_chain_req {
	__u64 buf_addr;
	__u32 buf_size;
	__u32 count;
} __packed;

struct cmd_chain_npu_req {
	u32 flags;
	u32 reserved;
	u64 buf_addr;
	u32 buf_size;
	u32 count;
} __packed;

union exec_chain_req {
	struct cmd_chain_npu_req npu_req;
	struct cmd_chain_req req;
};

struct cmd_chain_resp {
	enum aie2_msg_status	status;
	__u32			fail_cmd_idx;
+13 −0
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@ struct mgmt_mbox_chann_info {

static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
{
	const struct aie2_fw_feature_tbl *feature;
	struct amdxdna_dev *xdna = ndev->xdna;

	/*
@@ -78,6 +79,17 @@ static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 f
		XDNA_ERR(xdna, "Firmware minor version smaller than supported");
		return -EINVAL;
	}

	for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor;
	     feature++) {
		if (fw_minor < feature->min_minor)
			continue;
		if (feature->max_minor > 0 && fw_minor > feature->max_minor)
			continue;

		set_bit(feature->feature, &ndev->feature_mask);
	}

	return 0;
}

@@ -587,6 +599,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
	}

	release_firmware(fw);
	aie2_msg_init(ndev);
	amdxdna_pm_init(xdna);
	return 0;

+29 −0
Original line number Diff line number Diff line
@@ -156,6 +156,17 @@ enum aie2_dev_status {
	AIE2_DEV_START,
};

struct aie2_exec_msg_ops {
	int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
			   size_t *size, u32 *msg_op);
	int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
			    size_t *size, u32 *msg_op);
	void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
	int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
	int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
	u32 (*get_chain_msg_op)(u32 cmd_op);
};

struct amdxdna_dev_hdl {
	struct amdxdna_dev		*xdna;
	const struct amdxdna_dev_priv	*priv;
@@ -173,6 +184,8 @@ struct amdxdna_dev_hdl {
	u32				total_col;
	struct aie_version		version;
	struct aie_metadata		metadata;
	unsigned long			feature_mask;
	struct aie2_exec_msg_ops	*exec_msg_ops;

	/* power management and clock*/
	enum amdxdna_power_mode_type	pw_mode;
@@ -206,12 +219,26 @@ struct aie2_hw_ops {
	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
};

enum aie2_fw_feature {
	AIE2_NPU_COMMAND,
	AIE2_FEATURE_MAX
};

struct aie2_fw_feature_tbl {
	enum aie2_fw_feature feature;
	u32 max_minor;
	u32 min_minor;
};

#define AIE2_FEATURE_ON(ndev, feature)	test_bit(feature, &(ndev)->feature_mask)

struct amdxdna_dev_priv {
	const char			*fw_path;
	u64				protocol_major;
	u64				protocol_minor;
	const struct rt_config		*rt_config;
	const struct dpm_clk_freq	*dpm_clk_tbl;
	const struct aie2_fw_feature_tbl *fw_feature_tbl;

#define COL_ALIGN_NONE   0
#define COL_ALIGN_NATURE 1
@@ -236,6 +263,7 @@ extern const struct dpm_clk_freq npu1_dpm_clk_table[];
extern const struct dpm_clk_freq npu4_dpm_clk_table[];
extern const struct rt_config npu1_default_rt_cfg[];
extern const struct rt_config npu4_default_rt_cfg[];
extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];

/* aie2_smu.c */
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
@@ -260,6 +288,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
			       struct amdxdna_drm_get_array *args);

/* aie2_message.c */
void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
+3 −3
Original line number Diff line number Diff line
@@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
	return &cmd->data[num_masks];
}

int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
{
	struct amdxdna_cmd *cmd = abo->mem.kva;
	u32 num_masks, i;
	u32 *cu_mask;

	if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
		return -1;
		return INVALID_CU_IDX;

	num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
	cu_mask = cmd->data;
@@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
			return ffs(cu_mask[i]) - 1;
	}

	return -1;
	return INVALID_CU_IDX;
}

/*
Loading