Commit be427a88 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull more s390 updates from Vasily Gorbik:

 - Add PCI automatic error recovery.

 - Fix tape driver timer initialization broken during timers api
   cleanup.

 - Fix bogus CPU measurement counters values on CPUs offlining.

 - Check the validity of subchanel before reading other fields in the
   schib in cio code.

* tag 's390-5.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/cio: check the subchannel validity for dev_busid
  s390/cpumf: cpum_cf PMU displays invalid value after hotplug remove
  s390/tape: fix timer initialization in tape_std_assign()
  s390/pci: implement minimal PCI error recovery
  PCI: Export pci_dev_lock()
  s390/pci: implement reset_slot for hotplug slot
  s390/pci: refresh function handle in iomap
parents b89f311d a4751f15
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -210,9 +210,11 @@ int zpci_deconfigure_device(struct zpci_dev *zdev);
void zpci_device_reserved(struct zpci_dev *zdev);
bool zpci_is_device_configured(struct zpci_dev *zdev);

int zpci_hot_reset_device(struct zpci_dev *zdev);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
void zpci_update_fh(struct zpci_dev *zdev, u32 fh);

/* CLP */
int clp_setup_writeback_mio(void);
@@ -294,8 +296,10 @@ void zpci_debug_exit(void);
void zpci_debug_init_device(struct zpci_dev *, const char *);
void zpci_debug_exit_device(struct zpci_dev *);

/* Error reporting */
/* Error handling */
int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
int zpci_clear_error_state(struct zpci_dev *zdev);
int zpci_reset_load_store_blocked(struct zpci_dev *zdev);

#ifdef CONFIG_NUMA

+3 −1
Original line number Diff line number Diff line
@@ -687,8 +687,10 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
						      false);
			if (cfdiag_diffctr(cpuhw, event->hw.config_base))
				cfdiag_push_sample(event, cpuhw);
		} else
		} else if (cpuhw->flags & PMU_F_RESERVED) {
			/* Only update when PMU not hotplugged off */
			hw_perf_event_update(event);
		}
		hwc->state |= PERF_HES_UPTODATE;
	}
}
+144 −4
Original line number Diff line number Diff line
@@ -481,6 +481,34 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
	spin_unlock(&zpci_iomap_lock);
}

static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
{
	int bar, idx;

	spin_lock(&zpci_iomap_lock);
	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
		if (!zdev->bars[bar].size)
			continue;
		idx = zdev->bars[bar].map_idx;
		if (!zpci_iomap_start[idx].count)
			continue;
		WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
	}
	spin_unlock(&zpci_iomap_lock);
}

void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
{
	if (!fh || zdev->fh == fh)
		return;

	zdev->fh = fh;
	if (zpci_use_mio(zdev))
		return;
	if (zdev->has_resources && zdev_enabled(zdev))
		zpci_do_update_iomap_fh(zdev, fh);
}

static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
				    unsigned long size, unsigned long flags)
{
@@ -668,7 +696,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
	if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
		rc = -EIO;
	else
		zdev->fh = fh;
		zpci_update_fh(zdev, fh);
	return rc;
}

@@ -679,14 +707,14 @@ int zpci_disable_device(struct zpci_dev *zdev)

	cc = clp_disable_fh(zdev, &fh);
	if (!cc) {
		zdev->fh = fh;
		zpci_update_fh(zdev, fh);
	} else if (cc == CLP_RC_SETPCIFN_ALRDY) {
		pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
			zdev->fid);
		/* Function is already disabled - update handle */
		rc = clp_refresh_fh(zdev->fid, &fh);
		if (!rc) {
			zdev->fh = fh;
			zpci_update_fh(zdev, fh);
			rc = -EINVAL;
		}
	} else {
@@ -695,6 +723,65 @@ int zpci_disable_device(struct zpci_dev *zdev)
	return rc;
}

/**
 * zpci_hot_reset_device - perform a reset of the given zPCI function
 * @zdev: the slot which should be reset
 *
 * Performs a low level reset of the zPCI function. The reset is low level in
 * the sense that the zPCI function can be reset without detaching it from the
 * common PCI subsystem. The reset may be performed while under control of
 * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
 * table is reinstated at the end of the reset.
 *
 * After the reset the functions internal state is reset to an initial state
 * equivalent to its state during boot when first probing a driver.
 * Consequently after reset the PCI function requires re-initialization via the
 * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
 * and enabling the function via e.g.pci_enablde_device_flags().The caller
 * must guard against concurrent reset attempts.
 *
 * In most cases this function should not be called directly but through
 * pci_reset_function() or pci_reset_bus() which handle the save/restore and
 * locking.
 *
 * Return: 0 on success and an error value otherwise
 */
int zpci_hot_reset_device(struct zpci_dev *zdev)
{
	int rc;

	zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
	if (zdev_enabled(zdev)) {
		/* Disables device access, DMAs and IRQs (reset state) */
		rc = zpci_disable_device(zdev);
		/*
		 * Due to a z/VM vs LPAR inconsistency in the error state the
		 * FH may indicate an enabled device but disable says the
		 * device is already disabled don't treat it as an error here.
		 */
		if (rc == -EINVAL)
			rc = 0;
		if (rc)
			return rc;
	}

	rc = zpci_enable_device(zdev);
	if (rc)
		return rc;

	if (zdev->dma_table)
		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
					(u64)zdev->dma_table);
	else
		rc = zpci_dma_init_device(zdev);
	if (rc) {
		zpci_disable_device(zdev);
		return rc;
	}

	return 0;
}

/**
 * zpci_create_device() - Create a new zpci_dev and add it to the zbus
 * @fid: Function ID of the device to be created
@@ -776,7 +863,7 @@ int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
{
	int rc;

	zdev->fh = fh;
	zpci_update_fh(zdev, fh);
	/* the PCI function will be scanned once function 0 appears */
	if (!zdev->zbus->bus)
		return 0;
@@ -903,6 +990,59 @@ int zpci_report_error(struct pci_dev *pdev,
}
EXPORT_SYMBOL(zpci_report_error);

/**
 * zpci_clear_error_state() - Clears the zPCI error state of the device
 * @zdev: The zdev for which the zPCI error state should be reset
 *
 * Clear the zPCI error state of the device. If clearing the zPCI error state
 * fails the device is left in the error state. In this case it may make sense
 * to call zpci_io_perm_failure() on the associated pdev if it exists.
 *
 * Returns: 0 on success, -EIO otherwise
 */
int zpci_clear_error_state(struct zpci_dev *zdev)
{
	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
	struct zpci_fib fib = {0};
	u8 status;
	int cc;

	cc = zpci_mod_fc(req, &fib, &status);
	if (cc) {
		zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
		return -EIO;
	}

	return 0;
}

/**
 * zpci_reset_load_store_blocked() - Re-enables L/S from error state
 * @zdev: The zdev for which to unblock load/store access
 *
 * Re-enables load/store access for a PCI function in the error state while
 * keeping DMA blocked. In this state drivers can poke MMIO space to determine
 * if error recovery is possible while catching any rogue DMA access from the
 * device.
 *
 * Returns: 0 on success, -EIO otherwise
 */
int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
{
	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
	struct zpci_fib fib = {0};
	u8 status;
	int cc;

	cc = zpci_mod_fc(req, &fib, &status);
	if (cc) {
		zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
		return -EIO;
	}

	return 0;
}

static int zpci_mem_init(void)
{
	BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
+224 −6
Original line number Diff line number Diff line
@@ -47,18 +47,223 @@ struct zpci_ccdf_avail {
	u16 pec;			/* PCI event code */
} __packed;

static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
{
	switch (ers_res) {
	case PCI_ERS_RESULT_CAN_RECOVER:
	case PCI_ERS_RESULT_RECOVERED:
	case PCI_ERS_RESULT_NEED_RESET:
		return false;
	default:
		return true;
	}
}

static bool is_passed_through(struct zpci_dev *zdev)
{
	return zdev->s390_domain;
}

static bool is_driver_supported(struct pci_driver *driver)
{
	if (!driver || !driver->err_handler)
		return false;
	if (!driver->err_handler->error_detected)
		return false;
	if (!driver->err_handler->slot_reset)
		return false;
	if (!driver->err_handler->resume)
		return false;
	return true;
}

static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
							 struct pci_driver *driver)
{
	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;

	ers_res = driver->err_handler->error_detected(pdev,  pdev->error_state);
	if (ers_result_indicates_abort(ers_res))
		pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
	else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
		pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));

	return ers_res;
}

static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
							struct pci_driver *driver)
{
	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
	struct zpci_dev *zdev = to_zpci(pdev);
	int rc;

	pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
	rc = zpci_reset_load_store_blocked(zdev);
	if (rc) {
		pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
		/* Let's try a full reset instead */
		return PCI_ERS_RESULT_NEED_RESET;
	}

	if (driver->err_handler->mmio_enabled) {
		ers_res = driver->err_handler->mmio_enabled(pdev);
		if (ers_result_indicates_abort(ers_res)) {
			pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
				pci_name(pdev));
			return ers_res;
		} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
			pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
			return ers_res;
		}
	}

	pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
	rc = zpci_clear_error_state(zdev);
	if (!rc) {
		pdev->error_state = pci_channel_io_normal;
	} else {
		pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
		/* Let's try a full reset instead */
		return PCI_ERS_RESULT_NEED_RESET;
	}

	return ers_res;
}

static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
					    struct pci_driver *driver)
{
	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;

	pr_info("%s: Initiating reset\n", pci_name(pdev));
	if (zpci_hot_reset_device(to_zpci(pdev))) {
		pr_err("%s: The reset request failed\n", pci_name(pdev));
		return ers_res;
	}
	pdev->error_state = pci_channel_io_normal;
	ers_res = driver->err_handler->slot_reset(pdev);
	if (ers_result_indicates_abort(ers_res)) {
		pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
		return ers_res;
	}

	return ers_res;
}

/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
 * @pdev: PCI function to recover currently in the error state
 *
 * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
 * With the simplification that recovery always happens per function
 * and the platform determines which functions are affected for
 * multi-function devices.
 */
static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
{
	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
	struct pci_driver *driver;

	/*
	 * Ensure that the PCI function is not removed concurrently, no driver
	 * is unbound or probed and that userspace can't access its
	 * configuration space while we perform recovery.
	 */
	pci_dev_lock(pdev);
	if (pdev->error_state == pci_channel_io_perm_failure) {
		ers_res = PCI_ERS_RESULT_DISCONNECT;
		goto out_unlock;
	}
	pdev->error_state = pci_channel_io_frozen;

	if (is_passed_through(to_zpci(pdev))) {
		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
			pci_name(pdev));
		goto out_unlock;
	}

	driver = to_pci_driver(pdev->dev.driver);
	if (!is_driver_supported(driver)) {
		if (!driver)
			pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
				pci_name(pdev));
		else
			pr_info("%s: The %s driver bound to the device does not support error recovery\n",
				pci_name(pdev),
				driver->name);
		goto out_unlock;
	}

	ers_res = zpci_event_notify_error_detected(pdev, driver);
	if (ers_result_indicates_abort(ers_res))
		goto out_unlock;

	if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
		ers_res = zpci_event_do_error_state_clear(pdev, driver);
		if (ers_result_indicates_abort(ers_res))
			goto out_unlock;
	}

	if (ers_res == PCI_ERS_RESULT_NEED_RESET)
		ers_res = zpci_event_do_reset(pdev, driver);

	if (ers_res != PCI_ERS_RESULT_RECOVERED) {
		pr_err("%s: Automatic recovery failed; operator intervention is required\n",
		       pci_name(pdev));
		goto out_unlock;
	}

	pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
	if (driver->err_handler->resume)
		driver->err_handler->resume(pdev);
out_unlock:
	pci_dev_unlock(pdev);

	return ers_res;
}

/* zpci_event_io_failure - Report PCI channel failure state to driver
 * @pdev: PCI function for which to report
 * @es: PCI channel failure state to report
 */
static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
{
	struct pci_driver *driver;

	pci_dev_lock(pdev);
	pdev->error_state = es;
	/**
	 * While vfio-pci's error_detected callback notifies user-space QEMU
	 * reacts to this by freezing the guest. In an s390 environment PCI
	 * errors are rarely fatal so this is overkill. Instead in the future
	 * we will inject the error event and let the guest recover the device
	 * itself.
	 */
	if (is_passed_through(to_zpci(pdev)))
		goto out;
	driver = to_pci_driver(pdev->dev.driver);
	if (driver && driver->err_handler && driver->err_handler->error_detected)
		driver->err_handler->error_detected(pdev, pdev->error_state);
out:
	pci_dev_unlock(pdev);
}

static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
	struct pci_dev *pdev = NULL;
	pci_ers_result_t ers_res;

	zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
		 ccdf->fid, ccdf->fh, ccdf->pec);
	zpci_err("error CCDF:\n");
	zpci_err_hex(ccdf, sizeof(*ccdf));

	if (zdev)
	if (zdev) {
		zpci_update_fh(zdev, ccdf->fh);
		if (zdev->zbus->bus)
			pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
	}

	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -66,7 +271,20 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
	if (!pdev)
		return;

	pdev->error_state = pci_channel_io_perm_failure;
	switch (ccdf->pec) {
	case 0x003a: /* Service Action or Error Recovery Successful */
		ers_res = zpci_event_attempt_error_recovery(pdev);
		if (ers_res != PCI_ERS_RESULT_RECOVERED)
			zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
		break;
	default:
		/*
		 * Mark as frozen not permanently failed because the device
		 * could be subsequently recovered by the platform.
		 */
		zpci_event_io_failure(pdev, pci_channel_io_frozen);
		break;
	}
	pci_dev_put(pdev);
}

@@ -78,7 +296,7 @@ void zpci_event_error(void *data)

static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
{
	zdev->fh = fh;
	zpci_update_fh(zdev, fh);
	/* Give the driver a hint that the function is
	 * already unusable.
	 */
@@ -121,7 +339,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
		if (!zdev)
			zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
		else
			zdev->fh = ccdf->fh;
			zpci_update_fh(zdev, ccdf->fh);
		break;
	case 0x0303: /* Deconfiguration requested */
		if (zdev) {
@@ -130,7 +348,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
			 */
			if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
				break;
			zdev->fh = ccdf->fh;
			zpci_update_fh(zdev, ccdf->fh);
			zpci_deconfigure_device(zdev);
		}
		break;
+2 −2
Original line number Diff line number Diff line
@@ -163,7 +163,7 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
			       unsigned long len)
{
	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
	u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);

	return __zpci_load(data, req, ZPCI_OFFSET(addr));
}
@@ -244,7 +244,7 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
				unsigned long len)
{
	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
	u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);

	return __zpci_store(data, req, ZPCI_OFFSET(addr));
}
Loading