Commit 1e8be08d authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

iommu/arm-smmu-v3: Support IOMMU_DOMAIN_NESTED

For SMMUv3 a IOMMU_DOMAIN_NESTED is composed of a S2 iommu_domain acting
as the parent and a user provided STE fragment that defines the CD table
and related data with addresses translated by the S2 iommu_domain.

The kernel only permits userspace to control certain allowed bits of the
STE that are safe for user/guest control.

IOTLB maintenance is a bit subtle here, the S1 implicitly includes the S2
translation, but there is no way of knowing which S1 entries refer to a
range of S2.

For the IOTLB we follow ARM's guidance and issue a CMDQ_OP_TLBI_NH_ALL to
flush all ASIDs from the VMID after flushing the S2 on any change to the
S2.

The IOMMU_DOMAIN_NESTED can only be created from inside a VIOMMU as the
invalidation path relies on the VIOMMU to translate virtual stream ID used
in the invalidation commands for the CD table and ATS.

Link: https://patch.msgid.link/r/9-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com


Reviewed-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Reviewed-by: default avatarJerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: default avatarDonald Dutile <ddutile@redhat.com>
Signed-off-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Tested-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 69d9b312
Loading
Loading
Loading
Loading
+163 −0
Original line number Diff line number Diff line
@@ -30,7 +30,170 @@ void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
	return info;
}

static void arm_smmu_make_nested_cd_table_ste(
	struct arm_smmu_ste *target, struct arm_smmu_master *master,
	struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
{
	arm_smmu_make_s2_domain_ste(
		target, master, nested_domain->vsmmu->s2_parent, ats_enabled);

	target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
				      FIELD_PREP(STRTAB_STE_0_CFG,
						 STRTAB_STE_0_CFG_NESTED));
	target->data[0] |= nested_domain->ste[0] &
			   ~cpu_to_le64(STRTAB_STE_0_CFG);
	target->data[1] |= nested_domain->ste[1];
}

/*
 * Create a physical STE from the virtual STE that userspace provided when it
 * created the nested domain. Using the vSTE userspace can request:
 * - Non-valid STE
 * - Abort STE
 * - Bypass STE (install the S2, no CD table)
 * - CD table STE (install the S2 and the userspace CD table)
 */
static void arm_smmu_make_nested_domain_ste(
	struct arm_smmu_ste *target, struct arm_smmu_master *master,
	struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
{
	unsigned int cfg =
		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));

	/*
	 * Userspace can request a non-valid STE through the nesting interface.
	 * We relay that into an abort physical STE with the intention that
	 * C_BAD_STE for this SID can be generated to userspace.
	 */
	if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
		cfg = STRTAB_STE_0_CFG_ABORT;

	switch (cfg) {
	case STRTAB_STE_0_CFG_S1_TRANS:
		arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
						  ats_enabled);
		break;
	case STRTAB_STE_0_CFG_BYPASS:
		arm_smmu_make_s2_domain_ste(target, master,
					    nested_domain->vsmmu->s2_parent,
					    ats_enabled);
		break;
	case STRTAB_STE_0_CFG_ABORT:
	default:
		arm_smmu_make_abort_ste(target);
		break;
	}
}

static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
				      struct device *dev)
{
	struct arm_smmu_nested_domain *nested_domain =
		to_smmu_nested_domain(domain);
	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
	struct arm_smmu_attach_state state = {
		.master = master,
		.old_domain = iommu_get_domain_for_dev(dev),
		.ssid = IOMMU_NO_PASID,
		/* Currently invalidation of ATC is not supported */
		.disable_ats = true,
	};
	struct arm_smmu_ste ste;
	int ret;

	if (nested_domain->vsmmu->smmu != master->smmu)
		return -EINVAL;
	if (arm_smmu_ssids_in_use(&master->cd_table))
		return -EBUSY;

	mutex_lock(&arm_smmu_asid_lock);
	ret = arm_smmu_attach_prepare(&state, domain);
	if (ret) {
		mutex_unlock(&arm_smmu_asid_lock);
		return ret;
	}

	arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
					state.ats_enabled);
	arm_smmu_install_ste_for_dev(master, &ste);
	arm_smmu_attach_commit(&state);
	mutex_unlock(&arm_smmu_asid_lock);
	return 0;
}

static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
{
	kfree(to_smmu_nested_domain(domain));
}

static const struct iommu_domain_ops arm_smmu_nested_ops = {
	.attach_dev = arm_smmu_attach_dev_nested,
	.free = arm_smmu_domain_nested_free,
};

static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg)
{
	unsigned int cfg;

	if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
		memset(arg->ste, 0, sizeof(arg->ste));
		return 0;
	}

	/* EIO is reserved for invalid STE data. */
	if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
	    (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
		return -EIO;

	cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
	if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
	    cfg != STRTAB_STE_0_CFG_S1_TRANS)
		return -EIO;
	return 0;
}

static struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
			      const struct iommu_user_data *user_data)
{
	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
	const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID;
	struct arm_smmu_nested_domain *nested_domain;
	struct iommu_hwpt_arm_smmuv3 arg;
	int ret;

	/*
	 * Faults delivered to the nested domain are faults that originated by
	 * the S1 in the domain. The core code will match all PASIDs when
	 * delivering the fault due to user_pasid_table
	 */
	if (flags & ~SUPPORTED_FLAGS)
		return ERR_PTR(-EOPNOTSUPP);

	ret = iommu_copy_struct_from_user(&arg, user_data,
					  IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
	if (ret)
		return ERR_PTR(ret);

	ret = arm_smmu_validate_vste(&arg);
	if (ret)
		return ERR_PTR(ret);

	nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT);
	if (!nested_domain)
		return ERR_PTR(-ENOMEM);

	nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
	nested_domain->domain.ops = &arm_smmu_nested_ops;
	nested_domain->vsmmu = vsmmu;
	nested_domain->ste[0] = arg.ste[0];
	nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);

	return &nested_domain->domain;
}

static const struct iommufd_viommu_ops arm_vsmmu_ops = {
	.alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
};

struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
+16 −1
Original line number Diff line number Diff line
@@ -295,6 +295,7 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
	case CMDQ_OP_TLBI_NH_ASID:
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
		fallthrough;
	case CMDQ_OP_TLBI_NH_ALL:
	case CMDQ_OP_TLBI_S12_VMALL:
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
		break;
@@ -2230,6 +2231,15 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
	}
	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);

	if (smmu_domain->nest_parent) {
		/*
		 * When the S2 domain changes all the nested S1 ASIDs have to be
		 * flushed too.
		 */
		cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
		arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
	}

	/*
	 * Unfortunately, this can't be leaf-only since we may have
	 * zapped an entire table.
@@ -2644,6 +2654,8 @@ to_smmu_domain_devices(struct iommu_domain *domain)
	if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
	    domain->type == IOMMU_DOMAIN_SVA)
		return to_smmu_domain(domain);
	if (domain->type == IOMMU_DOMAIN_NESTED)
		return to_smmu_nested_domain(domain)->vsmmu->s2_parent;
	return NULL;
}

@@ -2716,7 +2728,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
		 * enabled if we have arm_smmu_domain, those always have page
		 * tables.
		 */
		state->ats_enabled = arm_smmu_ats_supported(master);
		state->ats_enabled = !state->disable_ats &&
				     arm_smmu_ats_supported(master);
	}

	if (smmu_domain) {
@@ -3122,6 +3135,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
			goto err_free;
		}
		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
		smmu_domain->nest_parent = true;
	}

	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
@@ -3518,6 +3532,7 @@ static struct iommu_ops arm_smmu_ops = {
	.page_response		= arm_smmu_page_response,
	.def_domain_type	= arm_smmu_def_domain_type,
	.viommu_alloc		= arm_vsmmu_alloc,
	.user_pasid_table	= 1,
	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
	.owner			= THIS_MODULE,
	.default_domain_ops = &(const struct iommu_domain_ops) {
+26 −0
Original line number Diff line number Diff line
@@ -244,6 +244,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
#define STRTAB_STE_0_CFG_BYPASS		4
#define STRTAB_STE_0_CFG_S1_TRANS	5
#define STRTAB_STE_0_CFG_S2_TRANS	6
#define STRTAB_STE_0_CFG_NESTED		7

#define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
#define STRTAB_STE_0_S1FMT_LINEAR	0
@@ -295,6 +296,15 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)

#define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)

/* These bits can be controlled by userspace for STRTAB_STE_0_CFG_NESTED */
#define STRTAB_STE_0_NESTING_ALLOWED                                         \
	cpu_to_le64(STRTAB_STE_0_V | STRTAB_STE_0_CFG | STRTAB_STE_0_S1FMT | \
		    STRTAB_STE_0_S1CTXPTR_MASK | STRTAB_STE_0_S1CDMAX)
#define STRTAB_STE_1_NESTING_ALLOWED                            \
	cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |   \
		    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |   \
		    STRTAB_STE_1_S1STALLD)

/*
 * Context descriptors.
 *
@@ -514,6 +524,7 @@ struct arm_smmu_cmdq_ent {
			};
		} cfgi;

		#define CMDQ_OP_TLBI_NH_ALL     0x10
		#define CMDQ_OP_TLBI_NH_ASID	0x11
		#define CMDQ_OP_TLBI_NH_VA	0x12
		#define CMDQ_OP_TLBI_EL2_ALL	0x20
@@ -815,10 +826,18 @@ struct arm_smmu_domain {
	struct list_head		devices;
	spinlock_t			devices_lock;
	bool				enforce_cache_coherency : 1;
	bool				nest_parent : 1;

	struct mmu_notifier		mmu_notifier;
};

struct arm_smmu_nested_domain {
	struct iommu_domain domain;
	struct arm_vsmmu *vsmmu;

	__le64 ste[2];
};

/* The following are exposed for testing purposes. */
struct arm_smmu_entry_writer_ops;
struct arm_smmu_entry_writer {
@@ -863,6 +882,12 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
	return container_of(dom, struct arm_smmu_domain, domain);
}

static inline struct arm_smmu_nested_domain *
to_smmu_nested_domain(struct iommu_domain *dom)
{
	return container_of(dom, struct arm_smmu_nested_domain, domain);
}

extern struct xarray arm_smmu_asid_xa;
extern struct mutex arm_smmu_asid_lock;

@@ -909,6 +934,7 @@ struct arm_smmu_attach_state {
	struct iommu_domain *old_domain;
	struct arm_smmu_master *master;
	bool cd_needs_ats;
	bool disable_ats;
	ioasid_t ssid;
	/* Resulting state */
	bool ats_enabled;
+20 −0
Original line number Diff line number Diff line
@@ -421,6 +421,26 @@ struct iommu_hwpt_vtd_s1 {
	__u32 __reserved;
};

/**
 * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
 *                                (IOMMU_HWPT_DATA_ARM_SMMUV3)
 *
 * @ste: The first two double words of the user space Stream Table Entry for
 *       the translation. Must be little-endian.
 *       Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
 *       - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
 *       - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
 *
 * -EIO will be returned if @ste is not legal or contains any non-allowed field.
 * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
 * nested domain will translate the same as the nesting parent. The S1 will
 * install a Context Descriptor Table pointing at userspace memory translated
 * by the nesting parent.
 */
struct iommu_hwpt_arm_smmuv3 {
	__aligned_le64 ste[2];
};

/**
 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
 * @IOMMU_HWPT_DATA_NONE: no data