Commit 4f8b6f25 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-6.10/dm-changes' of...

Merge tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Add a dm-crypt optional "high_priority" flag that enables the crypt
   workqueues to use WQ_HIGHPRI.

 - Export dm-crypt workqueues via sysfs (by enabling WQ_SYSFS) to allow
   for improved visibility and controls over IO and crypt workqueues.

 - Fix dm-crypt to no longer constrain max_segment_size to PAGE_SIZE.
   This limit isn't needed given that the block core provides late bio
   splitting if bio exceeds underlying limits (e.g. max_segment_size).

 - Fix dm-crypt crypt_queue's use of WQ_UNBOUND to not use
   WQ_CPU_INTENSIVE because it is meaningless with WQ_UNBOUND.

 - Fix various issues with dm-delay target (ranging from a resource
   teardown fix, a fix for hung task when using kthread mode, and other
   improvements that followed from code inspection).

* tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm-delay: remove timer_lock
  dm-delay: change locking to avoid contention
  dm-delay: fix max_delay calculations
  dm-delay: fix hung task introduced by kthread mode
  dm-delay: fix workqueue delay_timer race
  dm-crypt: don't set WQ_CPU_INTENSIVE for WQ_UNBOUND crypt_queue
  dm: use queue_limits_set
  dm-crypt: stop constraining max_segment_size to PAGE_SIZE
  dm-crypt: export sysfs of all workqueues
  dm-crypt: add the optional "high_priority" flag
parents 113d1dd9 8b21ac87
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -113,6 +113,11 @@ same_cpu_crypt
    The default is to use an unbound workqueue so that encryption work
    is automatically balanced between available CPUs.

high_priority
    Set dm-crypt workqueues and the writer thread to high priority. This
    improves throughput and latency of dm-crypt while degrading general
    responsiveness of the system.

submit_from_crypt_cpus
    Disable offloading writes to a separate thread after encryption.
    There are some situations where offloading write bios from the
+49 −24
Original line number Diff line number Diff line
@@ -47,6 +47,8 @@

#define DM_MSG_PREFIX "crypt"

static DEFINE_IDA(workqueue_ida);

/*
 * context holding the current state of a multi-part conversion
 */
@@ -137,9 +139,9 @@ struct iv_elephant_private {
 * and encrypts / decrypts at the same time.
 */
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
	     DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
	     DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE,
	     DM_CRYPT_WRITE_INLINE };
	     DM_CRYPT_SAME_CPU, DM_CRYPT_HIGH_PRIORITY,
	     DM_CRYPT_NO_OFFLOAD, DM_CRYPT_NO_READ_WORKQUEUE,
	     DM_CRYPT_NO_WRITE_WORKQUEUE, DM_CRYPT_WRITE_INLINE };

enum cipher_flags {
	CRYPT_MODE_INTEGRITY_AEAD,	/* Use authenticated mode for cipher */
@@ -184,6 +186,7 @@ struct crypt_config {
		struct crypto_aead **tfms_aead;
	} cipher_tfm;
	unsigned int tfms_count;
	int workqueue_id;
	unsigned long cipher_flags;

	/*
@@ -1653,8 +1656,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);

/*
 * Generate a new unfragmented bio with the given size
 * This should never violate the device limitations (but only because
 * max_segment_size is being constrained to PAGE_SIZE).
 * This should never violate the device limitations (but if it did then block
 * core should split the bio as needed).
 *
 * This function may be called concurrently. If we allocate from the mempool
 * concurrently, there is a possibility of deadlock. For example, if we have
@@ -2771,6 +2774,9 @@ static void crypt_dtr(struct dm_target *ti)
	if (cc->crypt_queue)
		destroy_workqueue(cc->crypt_queue);

	if (cc->workqueue_id)
		ida_free(&workqueue_ida, cc->workqueue_id);

	crypt_free_tfms(cc);

	bioset_exit(&cc->bs);
@@ -3134,7 +3140,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
	struct crypt_config *cc = ti->private;
	struct dm_arg_set as;
	static const struct dm_arg _args[] = {
		{0, 8, "Invalid number of feature args"},
		{0, 9, "Invalid number of feature args"},
	};
	unsigned int opt_params, val;
	const char *opt_string, *sval;
@@ -3161,6 +3167,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar

		else if (!strcasecmp(opt_string, "same_cpu_crypt"))
			set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
		else if (!strcasecmp(opt_string, "high_priority"))
			set_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);

		else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
			set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
@@ -3230,8 +3238,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
	struct crypt_config *cc;
	const char *devname = dm_table_device_name(ti->table);
	int key_size;
	int key_size, wq_id;
	unsigned int align_mask;
	unsigned int common_wq_flags;
	unsigned long long tmpll;
	int ret;
	size_t iv_size_padding, additional_req_size;
@@ -3398,20 +3407,38 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
		cc->tag_pool_max_sectors <<= cc->sector_shift;
	}

	wq_id = ida_alloc_min(&workqueue_ida, 1, GFP_KERNEL);
	if (wq_id < 0) {
		ti->error = "Couldn't get workqueue id";
		ret = wq_id;
		goto bad;
	}
	cc->workqueue_id = wq_id;

	ret = -ENOMEM;
	cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
	common_wq_flags = WQ_MEM_RECLAIM | WQ_SYSFS;
	if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
		common_wq_flags |= WQ_HIGHPRI;

	cc->io_queue = alloc_workqueue("kcryptd_io-%s-%d", common_wq_flags, 1, devname, wq_id);
	if (!cc->io_queue) {
		ti->error = "Couldn't create kcryptd io queue";
		goto bad;
	}

	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
		cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
						  1, devname);
	else
		cc->crypt_queue = alloc_workqueue("kcryptd/%s",
						  WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
						  num_online_cpus(), devname);
	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) {
		cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
						  common_wq_flags | WQ_CPU_INTENSIVE,
						  1, devname, wq_id);
	} else {
		/*
		 * While crypt_queue is certainly CPU intensive, the use of
		 * WQ_CPU_INTENSIVE is meaningless with WQ_UNBOUND.
		 */
		cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
						  common_wq_flags | WQ_UNBOUND,
						  num_online_cpus(), devname, wq_id);
	}
	if (!cc->crypt_queue) {
		ti->error = "Couldn't create kcryptd queue";
		goto bad;
@@ -3427,6 +3454,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
		ti->error = "Couldn't spawn write thread";
		goto bad;
	}
	if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
		set_user_nice(cc->write_thread, MIN_NICE);

	ti->num_flush_bios = 1;
	ti->limit_swap_bios = true;
@@ -3547,6 +3576,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,

		num_feature_args += !!ti->num_discard_bios;
		num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
		num_feature_args += test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
		num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
		num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
		num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
@@ -3560,6 +3590,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
				DMEMIT(" allow_discards");
			if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
				DMEMIT(" same_cpu_crypt");
			if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
				DMEMIT(" high_priority");
			if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
				DMEMIT(" submit_from_crypt_cpus");
			if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
@@ -3579,6 +3611,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
		DMEMIT_TARGET_NAME_VERSION(ti->type);
		DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n');
		DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n');
		DMEMIT(",high_priority=%c", test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags) ? 'y' : 'n');
		DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ?
		       'y' : 'n');
		DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ?
@@ -3688,14 +3721,6 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
	struct crypt_config *cc = ti->private;

	/*
	 * Unfortunate constraint that is required to avoid the potential
	 * for exceeding underlying device's max_segments limits -- due to
	 * crypt_alloc_buffer() possibly allocating pages for the encryption
	 * bio that are not as physically contiguous as the original bio.
	 */
	limits->max_segment_size = PAGE_SIZE;

	limits->logical_block_size =
		max_t(unsigned int, limits->logical_block_size, cc->sector_size);
	limits->physical_block_size =
@@ -3706,7 +3731,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)

static struct target_type crypt_target = {
	.name   = "crypt",
	.version = {1, 25, 0},
	.version = {1, 26, 0},
	.module = THIS_MODULE,
	.ctr    = crypt_ctr,
	.dtr    = crypt_dtr,
+31 −29
Original line number Diff line number Diff line
@@ -28,7 +28,8 @@ struct delay_class {

struct delay_c {
	struct timer_list delay_timer;
	struct mutex timer_lock;
	struct mutex process_bios_lock; /* hold while removing bios to be processed from list */
	spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */
	struct workqueue_struct *kdelayd_wq;
	struct work_struct flush_expired_bios;
	struct list_head delayed_bios;
@@ -49,8 +50,6 @@ struct dm_delay_info {
	unsigned long expires;
};

static DEFINE_MUTEX(delayed_bios_lock);

static void handle_delayed_timer(struct timer_list *t)
{
	struct delay_c *dc = from_timer(dc, t, delay_timer);
@@ -60,12 +59,7 @@ static void handle_delayed_timer(struct timer_list *t)

static void queue_timeout(struct delay_c *dc, unsigned long expires)
{
	mutex_lock(&dc->timer_lock);

	if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
		mod_timer(&dc->delay_timer, expires);

	mutex_unlock(&dc->timer_lock);
	timer_reduce(&dc->delay_timer, expires);
}

static inline bool delay_is_fast(struct delay_c *dc)
@@ -89,12 +83,16 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
	struct dm_delay_info *delayed, *next;
	struct bio_list flush_bio_list;
	LIST_HEAD(local_list);
	unsigned long next_expires = 0;
	bool start_timer = false;
	bio_list_init(&flush_bio_list);

	mutex_lock(&delayed_bios_lock);
	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
	mutex_lock(&dc->process_bios_lock);
	spin_lock(&dc->delayed_bios_lock);
	list_replace_init(&dc->delayed_bios, &local_list);
	spin_unlock(&dc->delayed_bios_lock);
	list_for_each_entry_safe(delayed, next, &local_list, list) {
		cond_resched();
		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
			struct bio *bio = dm_bio_from_per_bio_data(delayed,
@@ -114,7 +112,10 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
			}
		}
	}
	mutex_unlock(&delayed_bios_lock);
	spin_lock(&dc->delayed_bios_lock);
	list_splice(&local_list, &dc->delayed_bios);
	spin_unlock(&dc->delayed_bios_lock);
	mutex_unlock(&dc->process_bios_lock);

	if (start_timer)
		queue_timeout(dc, next_expires);
@@ -128,13 +129,13 @@ static int flush_worker_fn(void *data)

	while (!kthread_should_stop()) {
		flush_delayed_bios(dc, false);
		mutex_lock(&delayed_bios_lock);
		spin_lock(&dc->delayed_bios_lock);
		if (unlikely(list_empty(&dc->delayed_bios))) {
			set_current_state(TASK_INTERRUPTIBLE);
			mutex_unlock(&delayed_bios_lock);
			spin_unlock(&dc->delayed_bios_lock);
			schedule();
		} else {
			mutex_unlock(&delayed_bios_lock);
			spin_unlock(&dc->delayed_bios_lock);
			cond_resched();
		}
	}
@@ -154,8 +155,10 @@ static void delay_dtr(struct dm_target *ti)
{
	struct delay_c *dc = ti->private;

	if (dc->kdelayd_wq)
	if (dc->kdelayd_wq) {
		timer_shutdown_sync(&dc->delay_timer);
		destroy_workqueue(dc->kdelayd_wq);
	}

	if (dc->read.dev)
		dm_put_device(ti, dc->read.dev);
@@ -166,7 +169,7 @@ static void delay_dtr(struct dm_target *ti)
	if (dc->worker)
		kthread_stop(dc->worker);

	mutex_destroy(&dc->timer_lock);
	mutex_destroy(&dc->process_bios_lock);

	kfree(dc);
}
@@ -224,7 +227,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)

	ti->private = dc;
	INIT_LIST_HEAD(&dc->delayed_bios);
	mutex_init(&dc->timer_lock);
	mutex_init(&dc->process_bios_lock);
	spin_lock_init(&dc->delayed_bios_lock);
	dc->may_delay = true;
	dc->argc = argc;

@@ -240,19 +244,18 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
		ret = delay_class_ctr(ti, &dc->flush, argv);
		if (ret)
			goto bad;
		max_delay = max(max_delay, dc->write.delay);
		max_delay = max(max_delay, dc->flush.delay);
		goto out;
	}

	ret = delay_class_ctr(ti, &dc->write, argv + 3);
	if (ret)
		goto bad;
	max_delay = max(max_delay, dc->write.delay);

	if (argc == 6) {
		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
		if (ret)
			goto bad;
		max_delay = max(max_delay, dc->flush.delay);
		goto out;
	}

@@ -267,8 +270,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
		 * In case of small requested delays, use kthread instead of
		 * timers and workqueue to achieve better latency.
		 */
		dc->worker = kthread_create(&flush_worker_fn, dc,
					    "dm-delay-flush-worker");
		dc->worker = kthread_run(&flush_worker_fn, dc, "dm-delay-flush-worker");
		if (IS_ERR(dc->worker)) {
			ret = PTR_ERR(dc->worker);
			dc->worker = NULL;
@@ -309,14 +311,14 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
	delayed->context = dc;
	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);

	mutex_lock(&delayed_bios_lock);
	spin_lock(&dc->delayed_bios_lock);
	if (unlikely(!dc->may_delay)) {
		mutex_unlock(&delayed_bios_lock);
		spin_unlock(&dc->delayed_bios_lock);
		return DM_MAPIO_REMAPPED;
	}
	c->ops++;
	list_add_tail(&delayed->list, &dc->delayed_bios);
	mutex_unlock(&delayed_bios_lock);
	spin_unlock(&dc->delayed_bios_lock);

	if (delay_is_fast(dc))
		wake_up_process(dc->worker);
@@ -330,12 +332,12 @@ static void delay_presuspend(struct dm_target *ti)
{
	struct delay_c *dc = ti->private;

	mutex_lock(&delayed_bios_lock);
	spin_lock(&dc->delayed_bios_lock);
	dc->may_delay = false;
	mutex_unlock(&delayed_bios_lock);
	spin_unlock(&dc->delayed_bios_lock);

	if (!delay_is_fast(dc))
		del_timer_sync(&dc->delay_timer);
		timer_delete(&dc->delay_timer);
	flush_delayed_bios(dc, true);
}

+12 −15
Original line number Diff line number Diff line
@@ -1963,26 +1963,27 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
	bool wc = false, fua = false;
	int r;

	/*
	 * Copy table's limits to the DM device's request_queue
	 */
	q->limits = *limits;

	if (dm_table_supports_nowait(t))
		blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
	else
		blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);

	if (!dm_table_supports_discards(t)) {
		q->limits.max_discard_sectors = 0;
		q->limits.max_hw_discard_sectors = 0;
		q->limits.discard_granularity = 0;
		q->limits.discard_alignment = 0;
		q->limits.discard_misaligned = 0;
		limits->max_hw_discard_sectors = 0;
		limits->discard_granularity = 0;
		limits->discard_alignment = 0;
		limits->discard_misaligned = 0;
	}

	if (!dm_table_supports_write_zeroes(t))
		limits->max_write_zeroes_sectors = 0;

	if (!dm_table_supports_secure_erase(t))
		q->limits.max_secure_erase_sectors = 0;
		limits->max_secure_erase_sectors = 0;

	r = queue_limits_set(q, limits);
	if (r)
		return r;

	if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
		wc = true;
@@ -2007,9 +2008,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
	else
		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);

	if (!dm_table_supports_write_zeroes(t))
		q->limits.max_write_zeroes_sectors = 0;

	dm_table_verify_integrity(t);

	/*
@@ -2048,7 +2046,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
	}

	dm_update_crypto_profile(q, t);
	disk_update_readahead(t->md->disk);

	/*
	 * Check for request-based device is left to