Merge tag 'bcachefs-2024-08-21' of https://github.com/koverstreet/bcachefs (a4c76312) · Commits · git / linux-net

fs/bcachefs/data_update.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -337,6 +337,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
		printbuf_exit(&buf);

		bch2_fatal_error(c);
		ret = -EIO;
		goto out;
		}

fs/bcachefs/errcode.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -257,7 +257,6 @@
		x(BCH_ERR_nopromote, nopromote_in_flight) \
		x(BCH_ERR_nopromote, nopromote_no_writes) \
		x(BCH_ERR_nopromote, nopromote_enomem) \
		x(0, need_inode_lock) \
		x(0, invalid_snapshot_node) \
		x(0, option_needs_open_fs)

fs/bcachefs/extents.c

+22 −1

Original line number	Diff line number	Diff line
		@@ -929,8 +929,29 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
		bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
		if (p1.ptr.dev == p2.ptr.dev &&
		p1.ptr.gen == p2.ptr.gen &&

		/*
		* This checks that the two pointers point
		* to the same region on disk - adjusting
		* for the difference in where the extents
		* start, since one may have been trimmed:
		*/
		(s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
		(s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
		(s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k) &&

		/*
		* This additionally checks that the
		* extents overlap on disk, since the
		* previous check may trigger spuriously
		* when one extent is immediately partially
		* overwritten with another extent (so that
		* on disk they are adjacent) and
		* compression is in use:
		*/
		((p1.ptr.offset >= p2.ptr.offset &&
		p1.ptr.offset < p2.ptr.offset + p2.crc.compressed_size) \|\|
		(p2.ptr.offset >= p1.ptr.offset &&
		p2.ptr.offset < p1.ptr.offset + p1.crc.compressed_size)))
		return true;

		return false;

fs/bcachefs/fs-io-buffered.c

+40 −109

Original line number	Diff line number	Diff line
		@@ -802,8 +802,7 @@ static noinline void folios_trunc(folios fs, struct folio *fi)
		static int __bch2_buffered_write(struct bch_inode_info *inode,
		struct address_space *mapping,
		struct iov_iter *iter,
		loff_t pos, unsigned len,
		bool inode_locked)
		loff_t pos, unsigned len)
		{
		struct bch_fs *c = inode->v.i_sb->s_fs_info;
		struct bch2_folio_reservation res;
		@@ -827,15 +826,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,

		BUG_ON(!fs.nr);

		/*
		* If we're not using the inode lock, we need to lock all the folios for
		* atomiticity of writes vs. other writes:
		*/
		if (!inode_locked && folio_end_pos(darray_last(fs)) < end) {
		ret = -BCH_ERR_need_inode_lock;
		goto out;
		}

		f = darray_first(fs);
		if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
		ret = bch2_read_single_folio(f, mapping);
		@@ -932,10 +922,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
		end = pos + copied;

		spin_lock(&inode->v.i_lock);
		if (end > inode->v.i_size) {
		BUG_ON(!inode_locked);
		if (end > inode->v.i_size)
		i_size_write(&inode->v, end);
		}
		spin_unlock(&inode->v.i_lock);

		f_pos = pos;
		@@ -979,68 +967,12 @@ static ssize_t bch2_buffered_write(struct kiocb iocb, struct iov_iter iter)
		struct file *file = iocb->ki_filp;
		struct address_space *mapping = file->f_mapping;
		struct bch_inode_info *inode = file_bch_inode(file);
		loff_t pos;
		bool inode_locked = false;
		ssize_t written = 0, written2 = 0, ret = 0;

		/*
		* We don't take the inode lock unless i_size will be changing. Folio
		* locks provide exclusion with other writes, and the pagecache add lock
		* provides exclusion with truncate and hole punching.
		*
		* There is one nasty corner case where atomicity would be broken
		* without great care: when copying data from userspace to the page
		* cache, we do that with faults disable - a page fault would recurse
		* back into the filesystem, taking filesystem locks again, and
		* deadlock; so it's done with faults disabled, and we fault in the user
		* buffer when we aren't holding locks.
		*
		* If we do part of the write, but we then race and in the userspace
		* buffer have been evicted and are no longer resident, then we have to
		* drop our folio locks to re-fault them in, breaking write atomicity.
		*
		* To fix this, we restart the write from the start, if we weren't
		* holding the inode lock.
		*
		* There is another wrinkle after that; if we restart the write from the
		* start, and then get an unrecoverable error, we _cannot_ claim to
		* userspace that we did not write data we actually did - so we must
		* track (written2) the most we ever wrote.
		*/

		if ((iocb->ki_flags & IOCB_APPEND) \|\|
		(iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) {
		inode_lock(&inode->v);
		inode_locked = true;
		}

		ret = generic_write_checks(iocb, iter);
		if (ret <= 0)
		goto unlock;

		ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0);
		if (ret) {
		if (!inode_locked) {
		inode_lock(&inode->v);
		inode_locked = true;
		ret = file_remove_privs_flags(file, 0);
		}
		if (ret)
		goto unlock;
		}

		ret = file_update_time(file);
		if (ret)
		goto unlock;

		pos = iocb->ki_pos;
		loff_t pos = iocb->ki_pos;
		ssize_t written = 0;
		int ret = 0;

		bch2_pagecache_add_get(inode);

		if (!inode_locked &&
		(iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v)))
		goto get_inode_lock;

		do {
		unsigned offset = pos & (PAGE_SIZE - 1);
		unsigned bytes = iov_iter_count(iter);
		@@ -1065,17 +997,12 @@ static ssize_t bch2_buffered_write(struct kiocb iocb, struct iov_iter iter)
		}
		}

		if (unlikely(bytes != iov_iter_count(iter) && !inode_locked))
		goto get_inode_lock;

		if (unlikely(fatal_signal_pending(current))) {
		ret = -EINTR;
		break;
		}

		ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked);
		if (ret == -BCH_ERR_need_inode_lock)
		goto get_inode_lock;
		ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
		if (unlikely(ret < 0))
		break;

		@@ -1096,46 +1023,50 @@ static ssize_t bch2_buffered_write(struct kiocb iocb, struct iov_iter iter)
		}
		pos += ret;
		written += ret;
		written2 = max(written, written2);

		if (ret != bytes && !inode_locked)
		goto get_inode_lock;
		ret = 0;

		balance_dirty_pages_ratelimited(mapping);
		} while (iov_iter_count(iter));

		if (0) {
		get_inode_lock:
		bch2_pagecache_add_put(inode);
		inode_lock(&inode->v);
		inode_locked = true;
		bch2_pagecache_add_get(inode);

		iov_iter_revert(iter, written);
		pos -= written;
		written = 0;
		ret = 0;
		return written ? written : ret;
		}
		} while (iov_iter_count(iter));
		bch2_pagecache_add_put(inode);
		unlock:
		if (inode_locked)
		inode_unlock(&inode->v);

		iocb->ki_pos += written;
		ssize_t bch2_write_iter(struct kiocb iocb, struct iov_iter from)
		{
		struct file *file = iocb->ki_filp;
		struct bch_inode_info *inode = file_bch_inode(file);
		ssize_t ret;

		ret = max(written, written2) ?: ret;
		if (ret > 0)
		ret = generic_write_sync(iocb, ret);
		return ret;
		if (iocb->ki_flags & IOCB_DIRECT) {
		ret = bch2_direct_write(iocb, from);
		goto out;
		}

		ssize_t bch2_write_iter(struct kiocb iocb, struct iov_iter iter)
		{
		ssize_t ret = iocb->ki_flags & IOCB_DIRECT
		? bch2_direct_write(iocb, iter)
		: bch2_buffered_write(iocb, iter);
		inode_lock(&inode->v);

		ret = generic_write_checks(iocb, from);
		if (ret <= 0)
		goto unlock;

		ret = file_remove_privs(file);
		if (ret)
		goto unlock;

		ret = file_update_time(file);
		if (ret)
		goto unlock;

		ret = bch2_buffered_write(iocb, from);
		if (likely(ret > 0))
		iocb->ki_pos += ret;
		unlock:
		inode_unlock(&inode->v);

		if (ret > 0)
		ret = generic_write_sync(iocb, ret);
		out:
		return bch2_err_class(ret);
		}

fs/bcachefs/sb-errors_format.h

+5 −5

Original line number	Diff line number	Diff line
		@@ -23,7 +23,7 @@ enum bch_fsck_flags {
		x(jset_past_bucket_end, 9, 0) \
		x(jset_seq_blacklisted, 10, 0) \
		x(journal_entries_missing, 11, 0) \
		x(journal_entry_replicas_not_marked, 12, 0) \
		x(journal_entry_replicas_not_marked, 12, FSCK_AUTOFIX) \
		x(journal_entry_past_jset_end, 13, 0) \
		x(journal_entry_replicas_data_mismatch, 14, 0) \
		x(journal_entry_bkey_u64s_0, 15, 0) \
		@@ -288,10 +288,10 @@ enum bch_fsck_flags {
		x(invalid_btree_id, 274, 0) \
		x(alloc_key_io_time_bad, 275, 0) \
		x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
		x(accounting_key_junk_at_end, 277, 0) \
		x(accounting_key_replicas_nr_devs_0, 278, 0) \
		x(accounting_key_replicas_nr_required_bad, 279, 0) \
		x(accounting_key_replicas_devs_unsorted, 280, 0) \
		x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \
		x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \
		x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \
		x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \

		enum bch_sb_error_id {
		#define x(t, n, ...) BCH_FSCK_ERR_##t = n,