Commit 1f021341 authored by Keith Busch's avatar Keith Busch
Browse files

nvme-multipath: defer partition scanning



We need to suppress the partition scan from occuring within the
controller's scan_work context. If a path error occurs here, the IO will
wait until a path becomes available or all paths are torn down, but that
action also occurs within scan_work, so it would deadlock. Defer the
partion scan to a different context that does not block scan_work.

Reported-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarKeith Busch <kbusch@kernel.org>
parent 0ce96a67
Loading
Loading
Loading
Loading
+33 −6
Original line number Diff line number Diff line
@@ -579,6 +579,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
	return ret;
}

static void nvme_partition_scan_work(struct work_struct *work)
{
	struct nvme_ns_head *head =
		container_of(work, struct nvme_ns_head, partition_scan_work);

	if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
					     &head->disk->state)))
		return;

	mutex_lock(&head->disk->open_mutex);
	bdev_disk_changed(head->disk, false);
	mutex_unlock(&head->disk->open_mutex);
}

static void nvme_requeue_work(struct work_struct *work)
{
	struct nvme_ns_head *head =
@@ -605,6 +619,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
	bio_list_init(&head->requeue_list);
	spin_lock_init(&head->requeue_lock);
	INIT_WORK(&head->requeue_work, nvme_requeue_work);
	INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);

	/*
	 * Add a multipath node if the subsystems supports multiple controllers.
@@ -628,6 +643,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
		return PTR_ERR(head->disk);
	head->disk->fops = &nvme_ns_head_ops;
	head->disk->private_data = head;

	/*
	 * We need to suppress the partition scan from occuring within the
	 * controller's scan_work context. If a path error occurs here, the IO
	 * will wait until a path becomes available or all paths are torn down,
	 * but that action also occurs within scan_work, so it would deadlock.
	 * Defer the partion scan to a different context that does not block
	 * scan_work.
	 */
	set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
	sprintf(head->disk->disk_name, "nvme%dn%d",
			ctrl->subsys->instance, head->instance);
	return 0;
@@ -654,6 +679,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
			return;
		}
		nvme_add_ns_head_cdev(head);
		kblockd_schedule_work(&head->partition_scan_work);
	}

	mutex_lock(&head->lock);
@@ -973,14 +999,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
		return;
	if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
		nvme_cdev_del(&head->cdev, &head->cdev_device);
		del_gendisk(head->disk);
	}
		/*
		 * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
		 * to allow multipath to fail all I/O.
		 */
		synchronize_srcu(&head->srcu);
		kblockd_schedule_work(&head->requeue_work);
		del_gendisk(head->disk);
	}
}

void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -990,6 +1016,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
	/* make sure all pending bios are cleaned up */
	kblockd_schedule_work(&head->requeue_work);
	flush_work(&head->requeue_work);
	flush_work(&head->partition_scan_work);
	put_disk(head->disk);
}

+1 −0
Original line number Diff line number Diff line
@@ -494,6 +494,7 @@ struct nvme_ns_head {
	struct bio_list		requeue_list;
	spinlock_t		requeue_lock;
	struct work_struct	requeue_work;
	struct work_struct	partition_scan_work;
	struct mutex		lock;
	unsigned long		flags;
#define NVME_NSHEAD_DISK_LIVE	0