Commit 006ccc30 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: Kill journal pre-reservations



This deletes the complicated and somewhat expensive journal
pre-reservation machinery in favor of just using journal watermarks:
when the journal is more than half full, we run journal reclaim more
aggressively, and when the journal is more than 3/4s full we only allow
journal reclaim to get new journal reservations.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 701ff57e
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -3087,8 +3087,6 @@ void bch2_trans_put(struct btree_trans *trans)
		srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
	}

	bch2_journal_preres_put(&c->journal, &trans->journal_preres);

	kfree(trans->extra_journal_entries.data);

	if (trans->fs_usage_deltas) {
+0 −14
Original line number Diff line number Diff line
@@ -672,7 +672,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
		goto out;

	bch2_journal_pin_drop(j, &ck->journal);
	bch2_journal_preres_put(j, &ck->res);

	BUG_ON(!btree_node_locked(c_iter.path, 0));

@@ -770,18 +769,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,

	BUG_ON(insert->k.u64s > ck->u64s);

	if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
		int difference;

		BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);

		difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
		if (difference > 0) {
			trans->journal_preres.u64s	-= difference;
			ck->res.u64s			+= difference;
		}
	}

	bkey_copy(ck->k, insert);
	ck->valid = true;

@@ -1006,7 +993,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
		cond_resched();

		bch2_journal_pin_drop(&c->journal, &ck->journal);
		bch2_journal_preres_put(&c->journal, &ck->res);

		list_del(&ck->list);
		kfree(ck->k);
+2 −34
Original line number Diff line number Diff line
@@ -323,17 +323,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
		bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
}

static noinline int
bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
				   unsigned long trace_ip)
{
	return drop_locks_do(trans,
		bch2_journal_preres_get(&trans->c->journal,
			&trans->journal_preres,
			trans->journal_preres_u64s,
			(flags & BCH_WATERMARK_MASK)));
}

static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
						      unsigned flags)
{
@@ -882,14 +871,6 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
		}
	}

	ret = bch2_journal_preres_get(&c->journal,
			&trans->journal_preres, trans->journal_preres_u64s,
			(flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
	if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
		ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
	if (unlikely(ret))
		return ret;

	ret = bch2_trans_lock_write(trans);
	if (unlikely(ret))
		return ret;
@@ -1052,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
	struct bch_fs *c = trans->c;
	struct btree_insert_entry *i = NULL;
	struct btree_write_buffered_key *wb;
	unsigned u64s;
	int ret = 0;

	if (!trans->nr_updates &&
@@ -1112,13 +1092,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)

	EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));

	memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));

	trans->journal_u64s		= trans->extra_journal_entries.nr;
	trans->journal_preres_u64s	= 0;

	trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);

	if (trans->journal_transaction_names)
		trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);

@@ -1134,16 +1109,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
		if (i->key_cache_already_flushed)
			continue;

		/* we're going to journal the key being updated: */
		u64s = jset_u64s(i->k->k.u64s);
		if (i->cached &&
		    likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
			trans->journal_preres_u64s += u64s;

		if (i->flags & BTREE_UPDATE_NOJOURNAL)
			continue;

		trans->journal_u64s += u64s;
		/* we're going to journal the key being updated: */
		trans->journal_u64s += jset_u64s(i->k->k.u64s);

		/* and we're also going to log the overwrite: */
		if (trans->journal_transaction_names)
@@ -1175,8 +1145,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)

	trace_and_count(c, transaction_commit, trans, _RET_IP_);
out:
	bch2_journal_preres_put(&c->journal, &trans->journal_preres);

	if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
		bch2_write_ref_put(c, BCH_WRITE_REF_trans);
out_reset:
+0 −3
Original line number Diff line number Diff line
@@ -327,7 +327,6 @@ struct bkey_cached {
	struct rhash_head	hash;
	struct list_head	list;

	struct journal_preres	res;
	struct journal_entry_pin journal;
	u64			seq;

@@ -441,11 +440,9 @@ struct btree_trans {
	struct journal_entry_pin *journal_pin;

	struct journal_res	journal_res;
	struct journal_preres	journal_preres;
	u64			*journal_seq;
	struct disk_reservation *disk_res;
	unsigned		journal_u64s;
	unsigned		journal_preres_u64s;
	struct replicas_delta_list *fs_usage_deltas;
};

+0 −30
Original line number Diff line number Diff line
@@ -513,8 +513,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
		up_read(&c->gc_lock);
	as->took_gc_lock = false;

	bch2_journal_preres_put(&c->journal, &as->journal_preres);

	bch2_journal_pin_drop(&c->journal, &as->journal);
	bch2_journal_pin_flush(&c->journal, &as->journal);
	bch2_disk_reservation_put(c, &as->disk_res);
@@ -734,8 +732,6 @@ static void btree_update_nodes_written(struct btree_update *as)

	bch2_journal_pin_drop(&c->journal, &as->journal);

	bch2_journal_preres_put(&c->journal, &as->journal_preres);

	mutex_lock(&c->btree_interior_update_lock);
	for (i = 0; i < as->nr_new_nodes; i++) {
		b = as->new_nodes[i];
@@ -1047,7 +1043,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
	unsigned nr_nodes[2] = { 0, 0 };
	unsigned update_level = level;
	enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
	unsigned journal_flags = 0;
	int ret = 0;
	u32 restart_count = trans->restart_count;

@@ -1061,10 +1056,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
	flags &= ~BCH_WATERMARK_MASK;
	flags |= watermark;

	if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
		journal_flags |= JOURNAL_RES_GET_NONBLOCK;
	journal_flags |= watermark;

	while (1) {
		nr_nodes[!!update_level] += 1 + split;
		update_level++;
@@ -1129,27 +1120,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
	if (ret)
		goto err;

	ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
				      BTREE_UPDATE_JOURNAL_RES,
				      journal_flags|JOURNAL_RES_GET_NONBLOCK);
	if (ret) {
		if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
			ret = -BCH_ERR_journal_reclaim_would_deadlock;
			goto err;
		}

		ret = drop_locks_do(trans,
			bch2_journal_preres_get(&c->journal, &as->journal_preres,
					      BTREE_UPDATE_JOURNAL_RES,
					      journal_flags));
		if (ret == -BCH_ERR_journal_preres_get_blocked) {
			trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
			ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
		}
		if (ret)
			goto err;
	}

	ret = bch2_disk_reservation_get(c, &as->disk_res,
			(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
			c->opts.metadata_replicas,
Loading