Commit d68beb27 authored by Nicolin Chen's avatar Nicolin Chen Committed by Jason Gunthorpe
Browse files

iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU object

Implement the vIOMMU's cache_invalidate op for user space to invalidate
the IOTLB entries, Device ATS and CD entries that are cached by hardware.

Add struct iommu_viommu_arm_smmuv3_invalidate defining invalidation
entries that are simply in the native format of a 128-bit TLBI
command. Scan those commands against the permitted command list and fix
their VMID/SID fields to match what is stored in the vIOMMU.

Link: https://patch.msgid.link/r/12-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com


Co-developed-by: default avatarEric Auger <eric.auger@redhat.com>
Signed-off-by: default avatarEric Auger <eric.auger@redhat.com>
Co-developed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Tested-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent f27298a8
Loading
Loading
Loading
Loading
+134 −0
Original line number Diff line number Diff line
@@ -215,8 +215,134 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
	return &nested_domain->domain;
}

static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
{
	struct arm_smmu_master *master;
	struct device *dev;
	int ret = 0;

	xa_lock(&vsmmu->core.vdevs);
	dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
	if (!dev) {
		ret = -EIO;
		goto unlock;
	}
	master = dev_iommu_priv_get(dev);

	/* At this moment, iommufd only supports PCI device that has one SID */
	if (sid)
		*sid = master->streams[0].id;
unlock:
	xa_unlock(&vsmmu->core.vdevs);
	return ret;
}

/* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
struct arm_vsmmu_invalidation_cmd {
	union {
		u64 cmd[2];
		struct iommu_viommu_arm_smmuv3_invalidate ucmd;
	};
};

/*
 * Convert, in place, the raw invalidation command into an internal format that
 * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
 * stored in CPU endian.
 *
 * Enforce the VMID or SID on the command.
 */
static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
				      struct arm_vsmmu_invalidation_cmd *cmd)
{
	/* Commands are le64 stored in u64 */
	cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
	cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);

	switch (cmd->cmd[0] & CMDQ_0_OP) {
	case CMDQ_OP_TLBI_NSNH_ALL:
		/* Convert to NH_ALL */
		cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
			      FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
		cmd->cmd[1] = 0;
		break;
	case CMDQ_OP_TLBI_NH_VA:
	case CMDQ_OP_TLBI_NH_VAA:
	case CMDQ_OP_TLBI_NH_ALL:
	case CMDQ_OP_TLBI_NH_ASID:
		cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
		cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
		break;
	case CMDQ_OP_ATC_INV:
	case CMDQ_OP_CFGI_CD:
	case CMDQ_OP_CFGI_CD_ALL: {
		u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);

		if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
			return -EIO;
		cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
		cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
		break;
	}
	default:
		return -EIO;
	}
	return 0;
}

static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
				      struct iommu_user_data_array *array)
{
	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
	struct arm_smmu_device *smmu = vsmmu->smmu;
	struct arm_vsmmu_invalidation_cmd *last;
	struct arm_vsmmu_invalidation_cmd *cmds;
	struct arm_vsmmu_invalidation_cmd *cur;
	struct arm_vsmmu_invalidation_cmd *end;
	int ret;

	cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL);
	if (!cmds)
		return -ENOMEM;
	cur = cmds;
	end = cmds + array->entry_num;

	static_assert(sizeof(*cmds) == 2 * sizeof(u64));
	ret = iommu_copy_struct_from_full_user_array(
		cmds, sizeof(*cmds), array,
		IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
	if (ret)
		goto out;

	last = cmds;
	while (cur != end) {
		ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
		if (ret)
			goto out;

		/* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
		cur++;
		if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
			continue;

		/* FIXME always uses the main cmdq rather than trying to group by type */
		ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
						  cur - last, true);
		if (ret) {
			cur--;
			goto out;
		}
		last = cur;
	}
out:
	array->entry_num = cur - cmds;
	kfree(cmds);
	return ret;
}

static const struct iommufd_viommu_ops arm_vsmmu_ops = {
	.alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
	.cache_invalidate = arm_vsmmu_cache_invalidate,
};

struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
@@ -239,6 +365,14 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
	if (s2_parent->smmu != master->smmu)
		return ERR_PTR(-EINVAL);

	/*
	 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
	 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
	 * any change to remove this.
	 */
	if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
		return ERR_PTR(-EOPNOTSUPP);

	/*
	 * Must support some way to prevent the VM from bypassing the cache
	 * because VFIO currently does not do any cache maintenance. canwbs
+3 −3
Original line number Diff line number Diff line
@@ -766,9 +766,9 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
 *   insert their own list of commands then all of the commands from one
 *   CPU will appear before any of the commands from the other CPU.
 */
static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
				       struct arm_smmu_cmdq *cmdq,
				       u64 *cmds, int n, bool sync)
int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
				struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
				bool sync)
{
	u64 cmd_sync[CMDQ_ENT_DWORDS];
	u32 prod;
+5 −0
Original line number Diff line number Diff line
@@ -529,6 +529,7 @@ struct arm_smmu_cmdq_ent {
		#define CMDQ_OP_TLBI_NH_ALL     0x10
		#define CMDQ_OP_TLBI_NH_ASID	0x11
		#define CMDQ_OP_TLBI_NH_VA	0x12
		#define CMDQ_OP_TLBI_NH_VAA	0x13
		#define CMDQ_OP_TLBI_EL2_ALL	0x20
		#define CMDQ_OP_TLBI_EL2_ASID	0x21
		#define CMDQ_OP_TLBI_EL2_VA	0x22
@@ -951,6 +952,10 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state);
void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
				  const struct arm_smmu_ste *target);

int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
				struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
				bool sync);

#ifdef CONFIG_ARM_SMMU_V3_SVA
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
+24 −0
Original line number Diff line number Diff line
@@ -713,9 +713,11 @@ struct iommu_hwpt_get_dirty_bitmap {
 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
 *                                        Data Type
 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
 * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
 */
enum iommu_hwpt_invalidate_data_type {
	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
	IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
};

/**
@@ -754,6 +756,28 @@ struct iommu_hwpt_vtd_s1_invalidate {
	__u32 __reserved;
};

/**
 * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation
 *         (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
 * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
 *       Must be little-endian.
 *
 * Supported command list only when passing in a vIOMMU via @hwpt_id:
 *     CMDQ_OP_TLBI_NSNH_ALL
 *     CMDQ_OP_TLBI_NH_VA
 *     CMDQ_OP_TLBI_NH_VAA
 *     CMDQ_OP_TLBI_NH_ALL
 *     CMDQ_OP_TLBI_NH_ASID
 *     CMDQ_OP_ATC_INV
 *     CMDQ_OP_CFGI_CD
 *     CMDQ_OP_CFGI_CD_ALL
 *
 * -EIO will be returned if the command is not supported.
 */
struct iommu_viommu_arm_smmuv3_invalidate {
	__aligned_le64 cmd[2];
};

/**
 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
 * @size: sizeof(struct iommu_hwpt_invalidate)