Commit f86f7a75 authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba
Browse files

btrfs: use the flags of an extent map to identify the compression type



Currently, in struct extent_map, we use an unsigned int (32 bits) to
identify the compression type of an extent and an unsigned long (64 bits
on a 64 bits platform, 32 bits otherwise) for flags. We are only using
6 different flags, so an unsigned long is excessive and we can use flags
to identify the compression type instead of using a dedicated 32 bits
field.

We can easily have tens or hundreds of thousands (or more) of extent maps
on busy and large filesystems, specially with compression enabled or many
or large files with tons of small extents. So it's convenient to have the
extent_map structure as small as possible in order to use less memory.

So remove the compression type field from struct extent_map, use flags
to identify the compression type and shorten the flags field from an
unsigned long to a u32. This saves 8 bytes (on 64 bits platforms) and
reduces the size of the structure from 136 bytes down to 128 bytes, using
now only two cache lines, and increases the number of extent maps we can
have per 4K page from 30 to 32. By using a u32 for the flags instead of
an unsigned long, we no longer use test_bit(), set_bit() and clear_bit(),
but that level of atomicity is not needed as most flags are never cleared
once set (before adding an extent map to the tree), and the ones that can
be cleared or set after an extent map is added to the tree, are always
performed while holding the write lock on the extent map tree, while the
reader holds a lock on the tree or tests for a flag that never changes
once the extent map is in the tree (such as compression flags).

Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 27f0d9c9
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -584,7 +584,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
		goto out;
	}

	ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
	ASSERT(extent_map_is_compressed(em));
	compressed_len = em->block_len;

	cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ,
@@ -596,7 +596,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)

	cb->len = bbio->bio.bi_iter.bi_size;
	cb->compressed_len = compressed_len;
	cb->compress_type = em->compress_type;
	cb->compress_type = extent_map_compression(em);
	cb->orig_bbio = bbio;

	free_extent_map(em);
+4 −4
Original line number Diff line number Diff line
@@ -775,7 +775,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
	 * this em, as either we don't care about the generation, or the
	 * merged extent map will be rejected anyway.
	 */
	if (em && test_bit(EXTENT_FLAG_MERGED, &em->flags) &&
	if (em && (em->flags & EXTENT_FLAG_MERGED) &&
	    newer_than && em->generation >= newer_than) {
		free_extent_map(em);
		em = NULL;
@@ -802,7 +802,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info,
				   const struct extent_map *em)
{
	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
	if (extent_map_is_compressed(em))
		return BTRFS_MAX_COMPRESSED;
	return fs_info->max_extent_size;
}
@@ -828,7 +828,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
	/* No more em or hole */
	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
		goto out;
	if (test_bit(EXTENT_FLAG_PREALLOC, &next->flags))
	if (next->flags & EXTENT_FLAG_PREALLOC)
		goto out;
	/*
	 * If the next extent is at its max capacity, defragging current extent
@@ -998,7 +998,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,

		/* Skip holes and preallocated extents. */
		if (em->block_start == EXTENT_MAP_HOLE ||
		    test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
		    (em->flags & EXTENT_FLAG_PREALLOC))
			goto next;

		/* Skip older extent */
+6 −7
Original line number Diff line number Diff line
@@ -1032,8 +1032,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
		BUG_ON(extent_map_end(em) <= cur);
		BUG_ON(end < cur);

		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
			compress_type = em->compress_type;
		compress_type = extent_map_compression(em);

		iosize = min(extent_map_end(em) - cur, end - cur + 1);
		iosize = ALIGN(iosize, blocksize);
@@ -1042,7 +1041,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
		else
			disk_bytenr = em->block_start + extent_offset;
		block_start = em->block_start;
		if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
		if (em->flags & EXTENT_FLAG_PREALLOC)
			block_start = EXTENT_MAP_HOLE;

		/*
@@ -1079,7 +1078,7 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
		 * is a corner case so we prioritize correctness over
		 * non-optimal behavior (submitting 2 bios for the same extent).
		 */
		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
		if (compress_type != BTRFS_COMPRESS_NONE &&
		    prev_em_start && *prev_em_start != (u64)-1 &&
		    *prev_em_start != em->start)
			force_bio_submit = true;
@@ -1358,7 +1357,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
		block_start = em->block_start;
		disk_bytenr = em->block_start + extent_offset;

		ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
		ASSERT(!extent_map_is_compressed(em));
		ASSERT(block_start != EXTENT_MAP_HOLE);
		ASSERT(block_start != EXTENT_MAP_INLINE);

@@ -2360,7 +2359,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
				write_unlock(&map->lock);
				break;
			}
			if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
			if ((em->flags & EXTENT_FLAG_PINNED) ||
			    em->start != start) {
				write_unlock(&map->lock);
				free_extent_map(em);
@@ -2377,7 +2376,7 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
			 * extra reference on the em.
			 */
			if (list_empty(&em->list) ||
			    test_bit(EXTENT_FLAG_LOGGING, &em->flags))
			    (em->flags & EXTENT_FLAG_LOGGING))
				goto remove_em;
			/*
			 * If it's in the list of modified extents, remove it
+22 −29
Original line number Diff line number Diff line
@@ -50,7 +50,6 @@ struct extent_map *alloc_extent_map(void)
	if (!em)
		return NULL;
	RB_CLEAR_NODE(&em->rb_node);
	em->compress_type = BTRFS_COMPRESS_NONE;
	refcount_set(&em->refs, 1);
	INIT_LIST_HEAD(&em->list);
	return em;
@@ -189,14 +188,14 @@ static inline u64 extent_map_block_end(const struct extent_map *em)

static bool can_merge_extent_map(const struct extent_map *em)
{
	if (test_bit(EXTENT_FLAG_PINNED, &em->flags))
	if (em->flags & EXTENT_FLAG_PINNED)
		return false;

	/* Don't merge compressed extents, we need to know their actual size. */
	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
	if (extent_map_is_compressed(em))
		return false;

	if (test_bit(EXTENT_FLAG_LOGGING, &em->flags))
	if (em->flags & EXTENT_FLAG_LOGGING)
		return false;

	/*
@@ -258,7 +257,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
			em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
			em->mod_start = merge->mod_start;
			em->generation = max(em->generation, merge->generation);
			set_bit(EXTENT_FLAG_MERGED, &em->flags);
			em->flags |= EXTENT_FLAG_MERGED;

			rb_erase_cached(&merge->rb_node, &tree->map);
			RB_CLEAR_NODE(&merge->rb_node);
@@ -276,7 +275,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
		RB_CLEAR_NODE(&merge->rb_node);
		em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
		em->generation = max(em->generation, merge->generation);
		set_bit(EXTENT_FLAG_MERGED, &em->flags);
		em->flags |= EXTENT_FLAG_MERGED;
		free_extent_map(merge);
	}
}
@@ -319,13 +318,13 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
			   em->start, start, len, gen);

	em->generation = gen;
	clear_bit(EXTENT_FLAG_PINNED, &em->flags);
	em->flags &= ~EXTENT_FLAG_PINNED;
	em->mod_start = em->start;
	em->mod_len = em->len;

	if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
	if (em->flags & EXTENT_FLAG_FILLING) {
		prealloc = true;
		clear_bit(EXTENT_FLAG_FILLING, &em->flags);
		em->flags &= ~EXTENT_FLAG_FILLING;
	}

	try_merge_map(tree, em);
@@ -346,7 +345,7 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
{
	lockdep_assert_held_write(&tree->lock);

	clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
	em->flags &= ~EXTENT_FLAG_LOGGING;
	if (extent_map_in_tree(em))
		try_merge_map(tree, em);
}
@@ -471,9 +470,9 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
	lockdep_assert_held_write(&tree->lock);

	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
	WARN_ON(em->flags & EXTENT_FLAG_PINNED);
	rb_erase_cached(&em->rb_node, &tree->map);
	if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
	if (!(em->flags & EXTENT_FLAG_LOGGING))
		list_del_init(&em->list);
	RB_CLEAR_NODE(&em->rb_node);
}
@@ -485,9 +484,9 @@ static void replace_extent_mapping(struct extent_map_tree *tree,
{
	lockdep_assert_held_write(&tree->lock);

	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
	WARN_ON(cur->flags & EXTENT_FLAG_PINNED);
	ASSERT(extent_map_in_tree(cur));
	if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
	if (!(cur->flags & EXTENT_FLAG_LOGGING))
		list_del_init(&cur->list);
	rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map);
	RB_CLEAR_NODE(&cur->rb_node);
@@ -550,7 +549,7 @@ static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
	em->start = start;
	em->len = end - start;
	if (em->block_start < EXTENT_MAP_LAST_BYTE &&
	    !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
	    !extent_map_is_compressed(em)) {
		em->block_start += start_diff;
		em->block_len = em->len;
	}
@@ -653,8 +652,7 @@ static void drop_all_extent_maps_fast(struct extent_map_tree *tree)

		node = rb_first_cached(&tree->map);
		em = rb_entry(node, struct extent_map, rb_node);
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
		clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
		em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
		remove_extent_mapping(tree, em);
		free_extent_map(em);
		cond_resched_rwlock_write(&tree->lock);
@@ -730,19 +728,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
			}
		}

		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
		if (skip_pinned && (em->flags & EXTENT_FLAG_PINNED)) {
			start = em_end;
			goto next;
		}

		flags = em->flags;
		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
		/*
		 * In case we split the extent map, we want to preserve the
		 * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
		 * it on the new extent maps.
		 */
		clear_bit(EXTENT_FLAG_LOGGING, &flags);
		em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
		modified = !list_empty(&em->list);

		/*
@@ -753,7 +750,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
			goto remove_em;

		gen = em->generation;
		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
		compressed = extent_map_is_compressed(em);

		if (em->start < start) {
			if (!split) {
@@ -786,7 +783,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,

			split->generation = gen;
			split->flags = flags;
			split->compress_type = em->compress_type;
			replace_extent_mapping(em_tree, em, split, modified);
			free_extent_map(split);
			split = split2;
@@ -803,7 +799,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
			split->len = em_end - end;
			split->block_start = em->block_start;
			split->flags = flags;
			split->compress_type = em->compress_type;
			split->generation = gen;

			if (em->block_start < EXTENT_MAP_LAST_BYTE) {
@@ -969,14 +964,14 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
	}

	ASSERT(em->len == len);
	ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
	ASSERT(!extent_map_is_compressed(em));
	ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
	ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
	ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
	ASSERT(em->flags & EXTENT_FLAG_PINNED);
	ASSERT(!(em->flags & EXTENT_FLAG_LOGGING));
	ASSERT(!list_empty(&em->list));

	flags = em->flags;
	clear_bit(EXTENT_FLAG_PINNED, &em->flags);
	em->flags &= ~EXTENT_FLAG_PINNED;

	/* First, replace the em with a new extent_map starting from * em->start */
	split_pre->start = em->start;
@@ -987,7 +982,6 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
	split_pre->orig_block_len = split_pre->block_len;
	split_pre->ram_bytes = split_pre->len;
	split_pre->flags = flags;
	split_pre->compress_type = em->compress_type;
	split_pre->generation = em->generation;

	replace_extent_mapping(em_tree, em, split_pre, 1);
@@ -1006,7 +1000,6 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
	split_mid->orig_block_len = split_mid->block_len;
	split_mid->ram_bytes = split_mid->len;
	split_mid->flags = flags;
	split_mid->compress_type = em->compress_type;
	split_mid->generation = em->generation;
	add_extent_mapping(em_tree, split_mid, 1);

+50 −8
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@

#include <linux/rbtree.h>
#include <linux/refcount.h>
#include "compression.h"

#define EXTENT_MAP_LAST_BYTE ((u64)-4)
#define EXTENT_MAP_HOLE ((u64)-3)
@@ -13,18 +14,24 @@
/* bits for the extent_map::flags field */
enum {
	/* this entry not yet on disk, don't free it */
	EXTENT_FLAG_PINNED,
	EXTENT_FLAG_COMPRESSED,
	ENUM_BIT(EXTENT_FLAG_PINNED),
	ENUM_BIT(EXTENT_FLAG_COMPRESS_ZLIB),
	ENUM_BIT(EXTENT_FLAG_COMPRESS_LZO),
	ENUM_BIT(EXTENT_FLAG_COMPRESS_ZSTD),
	/* pre-allocated extent */
	EXTENT_FLAG_PREALLOC,
	ENUM_BIT(EXTENT_FLAG_PREALLOC),
	/* Logging this extent */
	EXTENT_FLAG_LOGGING,
	ENUM_BIT(EXTENT_FLAG_LOGGING),
	/* Filling in a preallocated extent */
	EXTENT_FLAG_FILLING,
	ENUM_BIT(EXTENT_FLAG_FILLING),
	/* This em is merged from two or more physically adjacent ems */
	EXTENT_FLAG_MERGED,
	ENUM_BIT(EXTENT_FLAG_MERGED),
};

/*
 * Keep this structure as compact as possible, as we can have really large
 * amounts of allocated extent maps at any time.
 */
struct extent_map {
	struct rb_node rb_node;

@@ -45,9 +52,8 @@ struct extent_map {
	 * For non-merged extents, it's from btrfs_file_extent_item::generation.
	 */
	u64 generation;
	unsigned long flags;
	u32 flags;
	refcount_t refs;
	unsigned int compress_type;
	struct list_head list;
};

@@ -59,6 +65,42 @@ struct extent_map_tree {

struct btrfs_inode;

static inline void extent_map_set_compression(struct extent_map *em,
					      enum btrfs_compression_type type)
{
	if (type == BTRFS_COMPRESS_ZLIB)
		em->flags |= EXTENT_FLAG_COMPRESS_ZLIB;
	else if (type == BTRFS_COMPRESS_LZO)
		em->flags |= EXTENT_FLAG_COMPRESS_LZO;
	else if (type == BTRFS_COMPRESS_ZSTD)
		em->flags |= EXTENT_FLAG_COMPRESS_ZSTD;
}

static inline enum btrfs_compression_type extent_map_compression(const struct extent_map *em)
{
	if (em->flags & EXTENT_FLAG_COMPRESS_ZLIB)
		return BTRFS_COMPRESS_ZLIB;

	if (em->flags & EXTENT_FLAG_COMPRESS_LZO)
		return BTRFS_COMPRESS_LZO;

	if (em->flags & EXTENT_FLAG_COMPRESS_ZSTD)
		return BTRFS_COMPRESS_ZSTD;

	return BTRFS_COMPRESS_NONE;
}

/*
 * More efficient way to determine if extent is compressed, instead of using
 * 'extent_map_compression() != BTRFS_COMPRESS_NONE'.
 */
static inline bool extent_map_is_compressed(const struct extent_map *em)
{
	return (em->flags & (EXTENT_FLAG_COMPRESS_ZLIB |
			     EXTENT_FLAG_COMPRESS_LZO |
			     EXTENT_FLAG_COMPRESS_ZSTD)) != 0;
}

static inline int extent_map_in_tree(const struct extent_map *em)
{
	return !RB_EMPTY_NODE(&em->rb_node);
Loading