Commit bceacfa9 authored by Daniel Hill's avatar Daniel Hill Committed by Kent Overstreet
Browse files

bcachefs: add counters for failed shrinker reclaim



This adds distinct counters for every reason the btree node shrinker can
fail to free an object - if our shrinker isn't making progress, this
will tell us why.

Signed-off-by: default avatarDaniel Hill <daniel@gluo.nz>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 692aa7a5
Loading
Loading
Loading
Loading
+68 −20
Original line number Diff line number Diff line
@@ -16,6 +16,12 @@
#include <linux/prefetch.h>
#include <linux/sched/mm.h>

#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
do {						 \
	if (shrinker_counter)			 \
		bc->not_freed_##counter++;	 \
} while (0)

const char * const bch2_btree_node_flags[] = {
#define x(f)	#f,
	BTREE_FLAGS()
@@ -238,7 +244,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
 * this version is for btree nodes that have already been freed (we're not
 * reaping a real btree node)
 */
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
{
	struct btree_cache *bc = &c->btree_cache;
	int ret = 0;
@@ -260,38 +266,64 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
	if (b->flags & ((1U << BTREE_NODE_dirty)|
			(1U << BTREE_NODE_read_in_flight)|
			(1U << BTREE_NODE_write_in_flight))) {
		if (!flush)
		if (!flush) {
			if (btree_node_dirty(b))
				BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
			else if (btree_node_read_in_flight(b))
				BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
			else if (btree_node_write_in_flight(b))
				BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
			return -BCH_ERR_ENOMEM_btree_node_reclaim;
		}

		/* XXX: waiting on IO with btree cache lock held */
		bch2_btree_node_wait_on_read(b);
		bch2_btree_node_wait_on_write(b);
	}

	if (!six_trylock_intent(&b->c.lock))
	if (!six_trylock_intent(&b->c.lock)) {
		BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
		return -BCH_ERR_ENOMEM_btree_node_reclaim;
	}

	if (!six_trylock_write(&b->c.lock))
	if (!six_trylock_write(&b->c.lock)) {
		BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
		goto out_unlock_intent;
	}

	/* recheck under lock */
	if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
			(1U << BTREE_NODE_write_in_flight))) {
		if (!flush)
		if (!flush) {
			if (btree_node_read_in_flight(b))
				BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
			else if (btree_node_write_in_flight(b))
				BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
			goto out_unlock;
		}
		six_unlock_write(&b->c.lock);
		six_unlock_intent(&b->c.lock);
		goto wait_on_io;
	}

	if (btree_node_noevict(b) ||
	    btree_node_write_blocked(b) ||
	    btree_node_will_make_reachable(b))
	if (btree_node_noevict(b)) {
		BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
		goto out_unlock;
	}
	if (btree_node_write_blocked(b)) {
		BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
		goto out_unlock;
	}
	if (btree_node_will_make_reachable(b)) {
		BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
		goto out_unlock;
	}

	if (btree_node_dirty(b)) {
		if (!flush)
		if (!flush) {
			BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
			goto out_unlock;
		}
		/*
		 * Using the underscore version because we don't want to compact
		 * bsets after the write, since this node is about to be evicted
@@ -321,14 +353,14 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
	goto out;
}

static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
{
	return __btree_node_reclaim(c, b, false);
	return __btree_node_reclaim(c, b, false, shrinker_counter);
}

static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
{
	return __btree_node_reclaim(c, b, true);
	return __btree_node_reclaim(c, b, true, false);
}

static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
@@ -376,11 +408,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
		if (touched >= nr)
			goto out;

		if (!btree_node_reclaim(c, b)) {
		if (!btree_node_reclaim(c, b, true)) {
			btree_node_data_free(c, b);
			six_unlock_write(&b->c.lock);
			six_unlock_intent(&b->c.lock);
			freed++;
			bc->freed++;
		}
	}
restart:
@@ -389,9 +422,11 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,

		if (btree_node_accessed(b)) {
			clear_btree_node_accessed(b);
		} else if (!btree_node_reclaim(c, b)) {
			bc->not_freed_access_bit++;
		} else if (!btree_node_reclaim(c, b, true)) {
			freed++;
			btree_node_data_free(c, b);
			bc->freed++;

			bch2_btree_node_hash_remove(bc, b);
			six_unlock_write(&b->c.lock);
@@ -599,7 +634,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
	struct btree *b;

	list_for_each_entry_reverse(b, &bc->live, list)
		if (!btree_node_reclaim(c, b))
		if (!btree_node_reclaim(c, b, false))
			return b;

	while (1) {
@@ -635,7 +670,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
	 * disk node. Check the freed list before allocating a new one:
	 */
	list_for_each_entry(b, freed, list)
		if (!btree_node_reclaim(c, b)) {
		if (!btree_node_reclaim(c, b, false)) {
			list_del_init(&b->list);
			goto got_node;
		}
@@ -661,7 +696,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
	 * the list. Check if there's any freed nodes there:
	 */
	list_for_each_entry(b2, &bc->freeable, list)
		if (!btree_node_reclaim(c, b2)) {
		if (!btree_node_reclaim(c, b2, false)) {
			swap(b->data, b2->data);
			swap(b->aux_data, b2->aux_data);
			btree_node_to_freedlist(bc, b2);
@@ -1280,12 +1315,12 @@ static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c,
	prt_printf(out, " (%u)\n", nr);
}

void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
{
	const struct btree_cache *bc = &c->btree_cache;
	struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);

	if (!out->nr_tabstops)
		printbuf_tabstop_push(out, 24);
		printbuf_tabstop_push(out, 32);

	prt_btree_cache_line(out, c, "total:",		bc->used);
	prt_btree_cache_line(out, c, "nr dirty:",	atomic_read(&bc->dirty));
@@ -1294,4 +1329,17 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)

	for (unsigned i = 0; i < ARRAY_SIZE(bc->used_by_btree); i++)
		prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->used_by_btree[i]);

	prt_newline(out);
	prt_printf(out, "freed:\t%u\n", bc->freed);
	prt_printf(out, "not freed:\n");
	prt_printf(out, "  dirty\t%u\n", bc->not_freed_dirty);
	prt_printf(out, "  write in flight\t%u\n", bc->not_freed_write_in_flight);
	prt_printf(out, "  read in flight\t%u\n", bc->not_freed_read_in_flight);
	prt_printf(out, "  lock intent failed\t%u\n", bc->not_freed_lock_intent);
	prt_printf(out, "  lock write failed\t%u\n", bc->not_freed_lock_write);
	prt_printf(out, "  access bit\t%u\n", bc->not_freed_access_bit);
	prt_printf(out, "  no evict failed\t%u\n", bc->not_freed_noevict);
	prt_printf(out, "  write blocked\t%u\n", bc->not_freed_write_blocked);
	prt_printf(out, "  will make reachable\t%u\n", bc->not_freed_will_make_reachable);
}
+1 −1
Original line number Diff line number Diff line
@@ -134,6 +134,6 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
const char *bch2_btree_id_str(enum btree_id);
void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *);
void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);

#endif /* _BCACHEFS_BTREE_CACHE_H */
+10 −0
Original line number Diff line number Diff line
@@ -163,6 +163,16 @@ struct btree_cache {
	/* Number of elements in live + freeable lists */
	unsigned		used;
	unsigned		reserve;
	unsigned		freed;
	unsigned		not_freed_lock_intent;
	unsigned		not_freed_lock_write;
	unsigned		not_freed_dirty;
	unsigned		not_freed_read_in_flight;
	unsigned		not_freed_write_in_flight;
	unsigned		not_freed_noevict;
	unsigned		not_freed_write_blocked;
	unsigned		not_freed_will_make_reachable;
	unsigned		not_freed_access_bit;
	atomic_t		dirty;
	struct shrinker		*shrink;

+1 −1
Original line number Diff line number Diff line
@@ -383,7 +383,7 @@ SHOW(bch2_fs)
		bch2_journal_debug_to_text(out, &c->journal);

	if (attr == &sysfs_btree_cache)
		bch2_btree_cache_to_text(out, c);
		bch2_btree_cache_to_text(out, &c->btree_cache);

	if (attr == &sysfs_btree_key_cache)
		bch2_btree_key_cache_to_text(out, &c->btree_key_cache);