Commit c6e56cf6 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: move integrity information into queue_limits



Move the integrity information into the queue limits so that it can be
set atomically with other queue limits, and that the sysfs changes to
the read_verify and write_generate flags are properly synchronized.
This also allows to provide a more useful helper to stack the integrity
fields, although it still is separate from the main stacking function
as not all stackable devices want to inherit the integrity settings.
Even with that it greatly simplifies the code in md and dm.

Note that the integrity field is moved as-is into the queue limits.
While there are good arguments for removing the separate blk_integrity
structure, this would cause a lot of churn and might better be done at a
later time if desired.  However the integrity field in the queue_limits
structure is now unconditional so that various ifdefs can be avoided or
replaced with IS_ENABLED().  Given that tiny size of it that seems like
a worthwhile trade off.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240613084839.1044015-13-hch@lst.de


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 9f4aa46f
Loading
Loading
Loading
Loading
+3 −46
Original line number Diff line number Diff line
@@ -153,18 +153,11 @@ bio_free() will automatically free the bip.
4.2 Block Device
----------------

Because the format of the protection data is tied to the physical
disk, each block device has been extended with a block integrity
profile (struct blk_integrity).  This optional profile is registered
with the block layer using blk_integrity_register().

The profile contains callback functions for generating and verifying
the protection data, as well as getting and setting application tags.
The profile also contains a few constants to aid in completing,
merging and splitting the integrity metadata.
Block devices can set up the integrity information in the integrity
sub-struture of the queue_limits structure.

Layered block devices will need to pick a profile that's appropriate
for all subdevices.  blk_integrity_compare() can help with that.  DM
for all subdevices.  queue_limits_stack_integrity() can help with that.  DM
and MD linear, RAID0 and RAID1 are currently supported.  RAID4/5/6
will require extra work due to the application tag.

@@ -250,42 +243,6 @@ will require extra work due to the application tag.
      integrity upon completion.


5.4 Registering A Block Device As Capable Of Exchanging Integrity Metadata
--------------------------------------------------------------------------

    To enable integrity exchange on a block device the gendisk must be
    registered as capable:

    `int blk_integrity_register(gendisk, blk_integrity);`

      The blk_integrity struct is a template and should contain the
      following::

        static struct blk_integrity my_profile = {
            .name                   = "STANDARDSBODY-TYPE-VARIANT-CSUM",
            .generate_fn            = my_generate_fn,
	    .verify_fn              = my_verify_fn,
	    .tuple_size             = sizeof(struct my_tuple_size),
	    .tag_size               = <tag bytes per hw sector>,
        };

      'name' is a text string which will be visible in sysfs.  This is
      part of the userland API so chose it carefully and never change
      it.  The format is standards body-type-variant.
      E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.

      'generate_fn' generates appropriate integrity metadata (for WRITE).

      'verify_fn' verifies that the data buffer matches the integrity
      metadata.

      'tuple_size' must be set to match the size of the integrity
      metadata per sector.  I.e. 8 for DIF and EPP.

      'tag_size' must be set to identify how many bytes of tag space
      are available per hardware sector.  For DIF this is either 2 or
      0 depending on the value of the Control Mode Page ATO bit.

----------------------------------------------------------------------

2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
+13 −111
Original line number Diff line number Diff line
@@ -107,63 +107,6 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
}
EXPORT_SYMBOL(blk_rq_map_integrity_sg);

/**
 * blk_integrity_compare - Compare integrity profile of two disks
 * @gd1:	Disk to compare
 * @gd2:	Disk to compare
 *
 * Description: Meta-devices like DM and MD need to verify that all
 * sub-devices use the same integrity format before advertising to
 * upper layers that they can send/receive integrity metadata.  This
 * function can be used to check whether two gendisk devices have
 * compatible integrity formats.
 */
int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
{
	struct blk_integrity *b1 = &gd1->queue->integrity;
	struct blk_integrity *b2 = &gd2->queue->integrity;

	if (!b1->tuple_size && !b2->tuple_size)
		return 0;

	if (!b1->tuple_size || !b2->tuple_size)
		return -1;

	if (b1->interval_exp != b2->interval_exp) {
		pr_err("%s: %s/%s protection interval %u != %u\n",
		       __func__, gd1->disk_name, gd2->disk_name,
		       1 << b1->interval_exp, 1 << b2->interval_exp);
		return -1;
	}

	if (b1->tuple_size != b2->tuple_size) {
		pr_err("%s: %s/%s tuple sz %u != %u\n", __func__,
		       gd1->disk_name, gd2->disk_name,
		       b1->tuple_size, b2->tuple_size);
		return -1;
	}

	if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
		pr_err("%s: %s/%s tag sz %u != %u\n", __func__,
		       gd1->disk_name, gd2->disk_name,
		       b1->tag_size, b2->tag_size);
		return -1;
	}

	if (b1->csum_type != b2->csum_type ||
	    (b1->flags & BLK_INTEGRITY_REF_TAG) !=
	    (b2->flags & BLK_INTEGRITY_REF_TAG)) {
		pr_err("%s: %s/%s type %s != %s\n", __func__,
		       gd1->disk_name, gd2->disk_name,
		       blk_integrity_profile_name(b1),
		       blk_integrity_profile_name(b2));
		return -1;
	}

	return 0;
}
EXPORT_SYMBOL(blk_integrity_compare);

bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
			    struct request *next)
{
@@ -217,7 +160,7 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,

static inline struct blk_integrity *dev_to_bi(struct device *dev)
{
	return &dev_to_disk(dev)->queue->integrity;
	return &dev_to_disk(dev)->queue->limits.integrity;
}

const char *blk_integrity_profile_name(struct blk_integrity *bi)
@@ -246,7 +189,8 @@ EXPORT_SYMBOL_GPL(blk_integrity_profile_name);
static ssize_t flag_store(struct device *dev, struct device_attribute *attr,
		const char *page, size_t count, unsigned char flag)
{
	struct blk_integrity *bi = dev_to_bi(dev);
	struct request_queue *q = dev_to_disk(dev)->queue;
	struct queue_limits lim;
	unsigned long val;
	int err;

@@ -254,11 +198,18 @@ static ssize_t flag_store(struct device *dev, struct device_attribute *attr,
	if (err)
		return err;

	/* the flags are inverted vs the values in the sysfs files */
	/* note that the flags are inverted vs the values in the sysfs files */
	lim = queue_limits_start_update(q);
	if (val)
		bi->flags &= ~flag;
		lim.integrity.flags &= ~flag;
	else
		bi->flags |= flag;
		lim.integrity.flags |= flag;

	blk_mq_freeze_queue(q);
	err = queue_limits_commit_update(q, &lim);
	blk_mq_unfreeze_queue(q);
	if (err)
		return err;
	return count;
}

@@ -355,52 +306,3 @@ const struct attribute_group blk_integrity_attr_group = {
	.name = "integrity",
	.attrs = integrity_attrs,
};

/**
 * blk_integrity_register - Register a gendisk as being integrity-capable
 * @disk:	struct gendisk pointer to make integrity-aware
 * @template:	block integrity profile to register
 *
 * Description: When a device needs to advertise itself as being able to
 * send/receive integrity metadata it must use this function to register
 * the capability with the block layer. The template is a blk_integrity
 * struct with values appropriate for the underlying hardware. See
 * Documentation/block/data-integrity.rst.
 */
void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
{
	struct blk_integrity *bi = &disk->queue->integrity;

	bi->csum_type = template->csum_type;
	bi->flags = template->flags;
	bi->interval_exp = template->interval_exp ? :
		ilog2(queue_logical_block_size(disk->queue));
	bi->tuple_size = template->tuple_size;
	bi->tag_size = template->tag_size;
	bi->pi_offset = template->pi_offset;

#ifdef CONFIG_BLK_INLINE_ENCRYPTION
	if (disk->queue->crypto_profile) {
		pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
		disk->queue->crypto_profile = NULL;
	}
#endif
}
EXPORT_SYMBOL(blk_integrity_register);

/**
 * blk_integrity_unregister - Unregister block integrity profile
 * @disk:	disk whose integrity profile to unregister
 *
 * Description: This function unregisters the integrity capability from
 * a block device.
 */
void blk_integrity_unregister(struct gendisk *disk)
{
	struct blk_integrity *bi = &disk->queue->integrity;

	if (!bi->tuple_size)
		return;
	memset(bi, 0, sizeof(*bi));
}
EXPORT_SYMBOL(blk_integrity_unregister);
+112 −6
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
#include <linux/pagemap.h>
#include <linux/backing-dev-defs.h>
#include <linux/gcd.h>
@@ -97,6 +97,36 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
	return 0;
}

static int blk_validate_integrity_limits(struct queue_limits *lim)
{
	struct blk_integrity *bi = &lim->integrity;

	if (!bi->tuple_size) {
		if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE ||
		    bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) {
			pr_warn("invalid PI settings.\n");
			return -EINVAL;
		}
		return 0;
	}

	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
		pr_warn("integrity support disabled.\n");
		return -EINVAL;
	}

	if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE &&
	    (bi->flags & BLK_INTEGRITY_REF_TAG)) {
		pr_warn("ref tag not support without checksum.\n");
		return -EINVAL;
	}

	if (!bi->interval_exp)
		bi->interval_exp = ilog2(lim->logical_block_size);

	return 0;
}

/*
 * Check that the limits in lim are valid, initialize defaults for unset
 * values, and cap values based on others where needed.
@@ -105,6 +135,7 @@ static int blk_validate_limits(struct queue_limits *lim)
{
	unsigned int max_hw_sectors;
	unsigned int logical_block_sectors;
	int err;

	/*
	 * Unless otherwise specified, default to 512 byte logical blocks and a
@@ -230,6 +261,9 @@ static int blk_validate_limits(struct queue_limits *lim)
		lim->misaligned = 0;
	}

	err = blk_validate_integrity_limits(lim);
	if (err)
		return err;
	return blk_validate_zoned_limits(lim);
}

@@ -263,13 +297,24 @@ int queue_limits_commit_update(struct request_queue *q,
		struct queue_limits *lim)
	__releases(q->limits_lock)
{
	int error = blk_validate_limits(lim);
	int error;

	error = blk_validate_limits(lim);
	if (error)
		goto out_unlock;

#ifdef CONFIG_BLK_INLINE_ENCRYPTION
	if (q->crypto_profile && lim->integrity.tag_size) {
		pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together.\n");
		error = -EINVAL;
		goto out_unlock;
	}
#endif

	if (!error) {
	q->limits = *lim;
	if (q->disk)
		blk_apply_bdi_limits(q->disk->bdi, lim);
	}
out_unlock:
	mutex_unlock(&q->limits_lock);
	return error;
}
@@ -575,6 +620,67 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
}
EXPORT_SYMBOL_GPL(queue_limits_stack_bdev);

/**
 * queue_limits_stack_integrity - stack integrity profile
 * @t: target queue limits
 * @b: base queue limits
 *
 * Check if the integrity profile in the @b can be stacked into the
 * target @t.  Stacking is possible if either:
 *
 *   a) does not have any integrity information stacked into it yet
 *   b) the integrity profile in @b is identical to the one in @t
 *
 * If @b can be stacked into @t, return %true.  Else return %false and clear the
 * integrity information in @t.
 */
bool queue_limits_stack_integrity(struct queue_limits *t,
		struct queue_limits *b)
{
	struct blk_integrity *ti = &t->integrity;
	struct blk_integrity *bi = &b->integrity;

	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
		return true;

	if (!ti->tuple_size) {
		/* inherit the settings from the first underlying device */
		if (!(ti->flags & BLK_INTEGRITY_STACKED)) {
			ti->flags = BLK_INTEGRITY_DEVICE_CAPABLE |
				(bi->flags & BLK_INTEGRITY_REF_TAG);
			ti->csum_type = bi->csum_type;
			ti->tuple_size = bi->tuple_size;
			ti->pi_offset = bi->pi_offset;
			ti->interval_exp = bi->interval_exp;
			ti->tag_size = bi->tag_size;
			goto done;
		}
		if (!bi->tuple_size)
			goto done;
	}

	if (ti->tuple_size != bi->tuple_size)
		goto incompatible;
	if (ti->interval_exp != bi->interval_exp)
		goto incompatible;
	if (ti->tag_size != bi->tag_size)
		goto incompatible;
	if (ti->csum_type != bi->csum_type)
		goto incompatible;
	if ((ti->flags & BLK_INTEGRITY_REF_TAG) !=
	    (bi->flags & BLK_INTEGRITY_REF_TAG))
		goto incompatible;

done:
	ti->flags |= BLK_INTEGRITY_STACKED;
	return true;

incompatible:
	memset(ti, 0, sizeof(*ti));
	return false;
}
EXPORT_SYMBOL_GPL(queue_limits_stack_integrity);

/**
 * blk_queue_update_dma_pad - update pad mask
 * @q:     the request queue for the device
+6 −6
Original line number Diff line number Diff line
@@ -116,7 +116,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 */
static void t10_pi_type1_prepare(struct request *rq)
{
	struct blk_integrity *bi = &rq->q->integrity;
	struct blk_integrity *bi = &rq->q->limits.integrity;
	const int tuple_sz = bi->tuple_size;
	u32 ref_tag = t10_pi_ref_tag(rq);
	u8 offset = bi->pi_offset;
@@ -167,7 +167,7 @@ static void t10_pi_type1_prepare(struct request *rq)
 */
static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
	struct blk_integrity *bi = &rq->q->integrity;
	struct blk_integrity *bi = &rq->q->limits.integrity;
	unsigned intervals = nr_bytes >> bi->interval_exp;
	const int tuple_sz = bi->tuple_size;
	u32 ref_tag = t10_pi_ref_tag(rq);
@@ -290,7 +290,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,

static void ext_pi_type1_prepare(struct request *rq)
{
	struct blk_integrity *bi = &rq->q->integrity;
	struct blk_integrity *bi = &rq->q->limits.integrity;
	const int tuple_sz = bi->tuple_size;
	u64 ref_tag = ext_pi_ref_tag(rq);
	u8 offset = bi->pi_offset;
@@ -330,7 +330,7 @@ static void ext_pi_type1_prepare(struct request *rq)

static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
{
	struct blk_integrity *bi = &rq->q->integrity;
	struct blk_integrity *bi = &rq->q->limits.integrity;
	unsigned intervals = nr_bytes >> bi->interval_exp;
	const int tuple_sz = bi->tuple_size;
	u64 ref_tag = ext_pi_ref_tag(rq);
@@ -396,7 +396,7 @@ blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter,

void blk_integrity_prepare(struct request *rq)
{
	struct blk_integrity *bi = &rq->q->integrity;
	struct blk_integrity *bi = &rq->q->limits.integrity;

	if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
		return;
@@ -409,7 +409,7 @@ void blk_integrity_prepare(struct request *rq)

void blk_integrity_complete(struct request *rq, unsigned int nr_bytes)
{
	struct blk_integrity *bi = &rq->q->integrity;
	struct blk_integrity *bi = &rq->q->limits.integrity;

	if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
		return;
+0 −1
Original line number Diff line number Diff line
@@ -206,7 +206,6 @@ struct dm_table {

	bool integrity_supported:1;
	bool singleton:1;
	unsigned integrity_added:1;

	/*
	 * Indicates the rw permissions for the new logical device.  This
Loading