Commit 335d318e authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: bch2_folio_reservation_get_partial() is now better behaved



bch2_folio_reservation_get_partial(), on partial success, will now
return a reservation that's aligned to the filesystem blocksize.

This is a partial fix for fstests generic/299 - fio verify is badly
behaved in the presence of short writes that aren't aligned to its
blocksize.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 81e0b6c7
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -1160,11 +1160,11 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c)
#define SECTORS_CACHE	1024

int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
			      u64 sectors, int flags)
				u64 sectors, enum bch_reservation_flags flags)
{
	struct bch_fs_pcpu *pcpu;
	u64 old, get;
	s64 sectors_available;
	u64 sectors_available;
	int ret;

	percpu_down_read(&c->mark_lock);
@@ -1202,6 +1202,9 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
	percpu_u64_set(&c->pcpu->sectors_available, 0);
	sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);

	if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
		sectors = min(sectors, sectors_available);

	if (sectors <= sectors_available ||
	    (flags & BCH_DISK_RESERVATION_NOFAIL)) {
		atomic64_set(&c->sectors_available,
+7 −5
Original line number Diff line number Diff line
@@ -344,14 +344,16 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c,
	}
}

#define BCH_DISK_RESERVATION_NOFAIL		(1 << 0)
enum bch_reservation_flags {
	BCH_DISK_RESERVATION_NOFAIL	= 1 << 0,
	BCH_DISK_RESERVATION_PARTIAL	= 1 << 1,
};

int __bch2_disk_reservation_add(struct bch_fs *,
				struct disk_reservation *,
				u64, int);
int __bch2_disk_reservation_add(struct bch_fs *, struct disk_reservation *,
				u64, enum bch_reservation_flags);

static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
					    u64 sectors, int flags)
					    u64 sectors, enum bch_reservation_flags flags)
{
#ifdef __KERNEL__
	u64 old, new;
+45 −25
Original line number Diff line number Diff line
@@ -399,14 +399,17 @@ void bch2_folio_reservation_put(struct bch_fs *c,
	bch2_quota_reservation_put(c, inode, &res->quota);
}

int bch2_folio_reservation_get(struct bch_fs *c,
static int __bch2_folio_reservation_get(struct bch_fs *c,
			struct bch_inode_info *inode,
			struct folio *folio,
			struct bch2_folio_reservation *res,
			size_t offset, size_t len)
			size_t offset, size_t len,
			bool partial)
{
	struct bch_folio *s = bch2_folio_create(folio, 0);
	unsigned i, disk_sectors = 0, quota_sectors = 0;
	struct disk_reservation disk_res = {};
	size_t reserved = len;
	int ret;

	if (!s)
@@ -422,48 +425,65 @@ int bch2_folio_reservation_get(struct bch_fs *c,
	}

	if (disk_sectors) {
		ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
		ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors,
				partial ? BCH_DISK_RESERVATION_PARTIAL : 0);
		if (unlikely(ret))
			return ret;

		if (unlikely(disk_res.sectors != disk_sectors)) {
			disk_sectors = quota_sectors = 0;

			for (i = round_down(offset, block_bytes(c)) >> 9;
			     i < round_up(offset + len, block_bytes(c)) >> 9;
			     i++) {
				disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
				if (disk_sectors > disk_res.sectors) {
					/*
					 * Make sure to get a reservation that's
					 * aligned to the filesystem blocksize:
					 */
					unsigned reserved_offset = round_down(i << 9, block_bytes(c));
					reserved = clamp(reserved_offset, offset, offset + len) - offset;

					if (!reserved) {
						bch2_disk_reservation_put(c, &disk_res);
						return -BCH_ERR_ENOSPC_disk_reservation;
					}
					break;
				}
				quota_sectors += s->s[i].state == SECTOR_unallocated;
			}
		}
	}

	if (quota_sectors) {
		ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
		if (unlikely(ret)) {
			struct disk_reservation tmp = { .sectors = disk_sectors };
			res->disk.sectors -= disk_sectors;

			bch2_disk_reservation_put(c, &tmp);
			bch2_disk_reservation_put(c, &disk_res);
			return ret;
		}
	}

	return 0;
	res->disk.sectors += disk_res.sectors;
	return partial ? reserved : 0;
}

ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
int bch2_folio_reservation_get(struct bch_fs *c,
			struct bch_inode_info *inode,
			struct folio *folio,
			struct bch2_folio_reservation *res,
			size_t offset, size_t len)
{
	size_t l, reserved = 0;
	int ret;

	while ((l = len - reserved)) {
		while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
			if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
				return reserved ?: ret;

			len = reserved + l;
			l /= 2;
	return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
}

		offset += l;
		reserved += l;
	}

	return reserved;
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
			struct bch_inode_info *inode,
			struct folio *folio,
			struct bch2_folio_reservation *res,
			size_t offset, size_t len)
{
	return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, true);
}

static void bch2_clear_folio_bits(struct folio *folio)