Commit c1e82275 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'bcachefs-2024-10-22' of https://github.com/koverstreet/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Lots of hotfixes:

   - transaction restart injection has been shaking out a few things

   - fix a data corruption in the buffered write path on -ENOSPC, found
     by xfstests generic/299

   - Some small show_options fixes

   - Repair mismatches in inode hash type, seed: different snapshot
     versions of an inode must have the same hash/type seed, used for
     directory entries and xattrs. We were checking the hash seed, but
     not the type, and a user contributed a filesystem where the hash
     type on one inode had somehow been flipped; these fixes allow his
     filesystem to repair.

     Additionally, the hash type flip made some directory entries
     invisible, which were then recreated by userspace; so the hash
     check code now checks for duplicate non dangling dirents, and
     renames one of them if necessary.

   - Don't use wait_event_interruptible() in recovery: this fixes some
     filesystems failing to mount with -ERESTARTSYS

   - Workaround for kvmalloc not supporting > INT_MAX allocations,
     causing an -ENOMEM when allocating the sorted array of journal
     keys: this allows a 75 TB filesystem to mount

   - Make sure bch_inode_unpacked.bi_snapshot is set in the old inode
     compat path: this alllows Marcin's filesystem (in use since before
     6.7) to repair and mount"

* tag 'bcachefs-2024-10-22' of https://github.com/koverstreet/bcachefs: (26 commits)
  bcachefs: Set bch_inode_unpacked.bi_snapshot in old inode path
  bcachefs: Mark more errors as AUTOFIX
  bcachefs: Workaround for kvmalloc() not supporting > INT_MAX allocations
  bcachefs: Don't use wait_event_interruptible() in recovery
  bcachefs: Fix __bch2_fsck_err() warning
  bcachefs: fsck: Improve hash_check_key()
  bcachefs: bch2_hash_set_or_get_in_snapshot()
  bcachefs: Repair mismatches in inode hash seed, type
  bcachefs: Add hash seed, type to inode_to_text()
  bcachefs: INODE_STR_HASH() for bch_inode_unpacked
  bcachefs: Run in-kernel offline fsck without ratelimit errors
  bcachefs: skip mount option handle for empty string.
  bcachefs: fix incorrect show_options results
  bcachefs: Fix data corruption on -ENOSPC in buffered write path
  bcachefs: bch2_folio_reservation_get_partial() is now better behaved
  bcachefs: fix disk reservation accounting in bch2_folio_reservation_get()
  bcachefS: ec: fix data type on stripe deletion
  bcachefs: Don't use commit_do() unnecessarily
  bcachefs: handle restarts in bch2_bucket_io_time_reset()
  bcachefs: fix restart handling in __bch2_resume_logged_op_finsert()
  ...
parents f009e946 a069f014
Loading
Loading
Loading
Loading
+21 −16
Original line number Diff line number Diff line
@@ -1977,7 +1977,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
					     ca->mi.bucket_size,
					     GFP_KERNEL);

		int ret = bch2_trans_do(c, NULL, NULL,
		int ret = bch2_trans_commit_do(c, NULL, NULL,
			BCH_WATERMARK_btree|
			BCH_TRANS_COMMIT_no_enospc,
			bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
@@ -2137,14 +2137,15 @@ static void bch2_do_invalidates_work(struct work_struct *work)

		struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
		ret = bkey_err(k);
		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
			continue;
		if (ret)
			break;
			goto restart_err;
		if (!k.k)
			break;

		ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
restart_err:
		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
			continue;
		if (ret)
			break;

@@ -2350,24 +2351,19 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)

/* Bucket IO clocks: */

int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
				size_t bucket_nr, int rw)
{
	struct bch_fs *c = trans->c;
	struct btree_iter iter;
	struct bkey_i_alloc_v4 *a;
	u64 now;
	int ret = 0;

	if (bch2_trans_relock(trans))
		bch2_trans_begin(trans);

	a = bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr));
	ret = PTR_ERR_OR_ZERO(a);
	struct btree_iter iter;
	struct bkey_i_alloc_v4 *a =
		bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr));
	int ret = PTR_ERR_OR_ZERO(a);
	if (ret)
		return ret;

	now = bch2_current_io_time(c, rw);
	u64 now = bch2_current_io_time(c, rw);
	if (a->v.io_time[rw] == now)
		goto out;

@@ -2380,6 +2376,15 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
	return ret;
}

int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
			      size_t bucket_nr, int rw)
{
	if (bch2_trans_relock(trans))
		bch2_trans_begin(trans);

	return nested_lockrestart_do(trans, __bch2_bucket_io_time_reset(trans, dev, bucket_nr, rw));
}

/* Startup/shutdown (ro/rw): */

void bch2_recalc_capacity(struct bch_fs *c)
+1 −1
Original line number Diff line number Diff line
@@ -684,7 +684,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
	struct bch_dev_usage usage;
	struct open_bucket *ob;

	bch2_trans_do(c, NULL, NULL, 0,
	bch2_trans_do(c,
		      PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
							data_type, cl, false, &usage)));
	return ob;
+11 −1
Original line number Diff line number Diff line
@@ -820,12 +820,22 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
	 * fix that here:
	 */
	alloc_data_type_set(&gc, gc.data_type);

	if (gc.data_type != old_gc.data_type ||
	    gc.dirty_sectors != old_gc.dirty_sectors) {
		ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc);
		if (ret)
			return ret;

		/*
		 * Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not
		 * safe w.r.t. transaction restarts, so fixup the gc_bucket so
		 * we don't run it twice:
		 */
		percpu_down_read(&c->mark_lock);
		struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
		gc_m->data_type = gc.data_type;
		gc_m->dirty_sectors = gc.dirty_sectors;
		percpu_up_read(&c->mark_lock);
	}

	if (fsck_err_on(new.data_type != gc.data_type,
+1 −1
Original line number Diff line number Diff line
@@ -1871,7 +1871,7 @@ static void btree_node_write_work(struct work_struct *work)

		}
	} else {
		ret = bch2_trans_do(c, NULL, NULL, 0,
		ret = bch2_trans_do(c,
			bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
					BCH_WATERMARK_interior_updates|
					BCH_TRANS_COMMIT_journal_reclaim|
+2 −0
Original line number Diff line number Diff line
@@ -912,6 +912,8 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
	_ret;								\
})

#define bch2_trans_do(_c, _do)	bch2_trans_run(_c, lockrestart_do(trans, _do))

struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
void bch2_trans_put(struct btree_trans *);

Loading