Commit ecda4833 authored by Matthew Rosato's avatar Matthew Rosato Committed by Joerg Roedel
Browse files

iommu/s390: Implement blocking domain



This fixes a crash when surprise hot-unplugging a PCI device. This crash
happens because during hot-unplug __iommu_group_set_domain_nofail()
attaching the default domain fails when the platform no longer
recognizes the device as it has already been removed and we end up with
a NULL domain pointer and UAF. This is exactly the case referred to in
the second comment in __iommu_device_set_domain() and just as stated
there if we can instead attach the blocking domain the UAF is prevented
as this can handle the already removed device. Implement the blocking
domain to use this handling.  With this change, the crash is fixed but
we still hit a warning attempting to change DMA ownership on a blocked
device.

Fixes: c76c067e ("s390/pci: Use dma-iommu layer")
Co-developed-by: default avatarNiklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: default avatarNiklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: default avatarMatthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: default avatarNiklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240910211516.137933-1-mjrosato@linux.ibm.com


Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 8e929cb5
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -96,7 +96,6 @@ struct zpci_bar_struct {
	u8		size;		/* order 2 exponent */
};

struct s390_domain;
struct kvm_zdev;

#define ZPCI_FUNCTIONS_PER_BUS 256
@@ -181,9 +180,10 @@ struct zpci_dev {
	struct dentry	*debugfs_dev;

	/* IOMMU and passthrough */
	struct s390_domain *s390_domain; /* s390 IOMMU domain data */
	struct iommu_domain *s390_domain; /* attached IOMMU domain */
	struct kvm_zdev *kzdev;
	struct mutex kzdev_lock;
	spinlock_t dom_lock;		/* protect s390_domain change */
};

static inline bool zdev_enabled(struct zpci_dev *zdev)
+3 −0
Original line number Diff line number Diff line
@@ -160,6 +160,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
	struct zpci_iommu_ctrs *ctrs;
	struct zpci_fib fib = {0};
	unsigned long flags;
	u8 cc, status;

	if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
@@ -171,6 +172,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
	WARN_ON((u64) zdev->fmb & 0xf);

	/* reset software counters */
	spin_lock_irqsave(&zdev->dom_lock, flags);
	ctrs = zpci_get_iommu_ctrs(zdev);
	if (ctrs) {
		atomic64_set(&ctrs->mapped_pages, 0);
@@ -179,6 +181,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
		atomic64_set(&ctrs->sync_map_rpcits, 0);
		atomic64_set(&ctrs->sync_rpcits, 0);
	}
	spin_unlock_irqrestore(&zdev->dom_lock, flags);


	fib.fmb_addr = virt_to_phys(zdev->fmb);
+8 −2
Original line number Diff line number Diff line
@@ -71,17 +71,23 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,

static void pci_sw_counter_show(struct seq_file *m)
{
	struct zpci_iommu_ctrs  *ctrs = zpci_get_iommu_ctrs(m->private);
	struct zpci_dev *zdev = m->private;
	struct zpci_iommu_ctrs *ctrs;
	atomic64_t *counter;
	unsigned long flags;
	int i;

	spin_lock_irqsave(&zdev->dom_lock, flags);
	ctrs = zpci_get_iommu_ctrs(m->private);
	if (!ctrs)
		return;
		goto unlock;

	counter = &ctrs->mapped_pages;
	for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
		seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
			   atomic64_read(counter));
unlock:
	spin_unlock_irqrestore(&zdev->dom_lock, flags);
}

static int pci_perf_show(struct seq_file *m, void *v)
+46 −27
Original line number Diff line number Diff line
@@ -33,6 +33,8 @@ struct s390_domain {
	struct rcu_head		rcu;
};

static struct iommu_domain blocking_domain;

static inline unsigned int calc_rtx(dma_addr_t ptr)
{
	return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
@@ -369,20 +371,36 @@ static void s390_domain_free(struct iommu_domain *domain)
	call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
}

static void s390_iommu_detach_device(struct iommu_domain *domain,
static void zdev_s390_domain_update(struct zpci_dev *zdev,
				    struct iommu_domain *domain)
{
	unsigned long flags;

	spin_lock_irqsave(&zdev->dom_lock, flags);
	zdev->s390_domain = domain;
	spin_unlock_irqrestore(&zdev->dom_lock, flags);
}

static int blocking_domain_attach_device(struct iommu_domain *domain,
					 struct device *dev)
{
	struct s390_domain *s390_domain = to_s390_domain(domain);
	struct zpci_dev *zdev = to_zpci_dev(dev);
	struct s390_domain *s390_domain;
	unsigned long flags;

	if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
		return 0;

	s390_domain = to_s390_domain(zdev->s390_domain);
	spin_lock_irqsave(&s390_domain->list_lock, flags);
	list_del_rcu(&zdev->iommu_list);
	spin_unlock_irqrestore(&s390_domain->list_lock, flags);

	zpci_unregister_ioat(zdev, 0);
	zdev->s390_domain = NULL;
	zdev->dma_table = NULL;
	zdev_s390_domain_update(zdev, domain);

	return 0;
}

static int s390_iommu_attach_device(struct iommu_domain *domain,
@@ -401,20 +419,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
		domain->geometry.aperture_end < zdev->start_dma))
		return -EINVAL;

	if (zdev->s390_domain)
		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
	blocking_domain_attach_device(&blocking_domain, dev);

	/* If we fail now DMA remains blocked via blocking domain */
	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
				virt_to_phys(s390_domain->dma_table), &status);
	/*
	 * If the device is undergoing error recovery the reset code
	 * will re-establish the new domain.
	 */
	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
		return -EIO;

	zdev->dma_table = s390_domain->dma_table;
	zdev->s390_domain = s390_domain;
	zdev_s390_domain_update(zdev, domain);

	spin_lock_irqsave(&s390_domain->list_lock, flags);
	list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
@@ -466,19 +479,11 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
	if (zdev->tlb_refresh)
		dev->iommu->shadow_on_flush = 1;

	return &zdev->iommu_dev;
}

static void s390_iommu_release_device(struct device *dev)
{
	struct zpci_dev *zdev = to_zpci_dev(dev);
	/* Start with DMA blocked */
	spin_lock_init(&zdev->dom_lock);
	zdev_s390_domain_update(zdev, &blocking_domain);

	/*
	 * release_device is expected to detach any domain currently attached
	 * to the device, but keep it attached to other devices in the group.
	 */
	if (zdev)
		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
	return &zdev->iommu_dev;
}

static int zpci_refresh_all(struct zpci_dev *zdev)
@@ -697,9 +702,15 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,

struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
{
	if (!zdev || !zdev->s390_domain)
	struct s390_domain *s390_domain;

	lockdep_assert_held(&zdev->dom_lock);

	if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
		return NULL;
	return &zdev->s390_domain->ctrs;

	s390_domain = to_s390_domain(zdev->s390_domain);
	return &s390_domain->ctrs;
}

int zpci_init_iommu(struct zpci_dev *zdev)
@@ -776,11 +787,19 @@ static int __init s390_iommu_init(void)
}
subsys_initcall(s390_iommu_init);

static struct iommu_domain blocking_domain = {
	.type = IOMMU_DOMAIN_BLOCKED,
	.ops = &(const struct iommu_domain_ops) {
		.attach_dev	= blocking_domain_attach_device,
	}
};

static const struct iommu_ops s390_iommu_ops = {
	.blocked_domain		= &blocking_domain,
	.release_domain		= &blocking_domain,
	.capable = s390_iommu_capable,
	.domain_alloc_paging = s390_domain_alloc_paging,
	.probe_device = s390_iommu_probe_device,
	.release_device = s390_iommu_release_device,
	.device_group = generic_device_group,
	.pgsize_bitmap = SZ_4K,
	.get_resv_regions = s390_iommu_get_resv_regions,