Commit 53ea167b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ext4 updates from Ted Ts'o:
 "Various cleanups and bug fixes in ext4's extent status tree,
  journalling, and block allocator subsystems.

  Also improve performance for parallel DIO overwrites"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (55 commits)
  ext4: avoid updating the superblock on a r/o mount if not needed
  jbd2: skip reading super block if it has been verified
  ext4: fix to check return value of freeze_bdev() in ext4_shutdown()
  ext4: refactoring to use the unified helper ext4_quotas_off()
  ext4: turn quotas off if mount failed after enabling quotas
  ext4: update doc about journal superblock description
  ext4: add journal cycled recording support
  jbd2: continue to record log between each mount
  jbd2: remove j_format_version
  jbd2: factor out journal initialization from journal_get_superblock()
  jbd2: switch to check format version in superblock directly
  jbd2: remove unused feature macros
  ext4: ext4_put_super: Remove redundant checking for 'sbi->s_journal_bdev'
  ext4: Fix reusing stale buffer heads from last failed mounting
  ext4: allow concurrent unaligned dio overwrites
  ext4: clean up mballoc criteria comments
  ext4: make ext4_zeroout_es() return void
  ext4: make ext4_es_insert_extent() return void
  ext4: make ext4_es_insert_delayed_block() return void
  ext4: make ext4_es_remove_extent() return void
  ...
parents b9d02c22 2ef6c32a
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -260,8 +260,13 @@ which is 1024 bytes long:
     - s_num_fc_blocks
     - Number of fast commit blocks in the journal.
   * - 0x58
     - __be32
     - s_head
     - Block number of the head (first unused block) of the journal, only
       up-to-date when the journal is empty.
   * - 0x5C
     - __u32
     - s_padding[42]
     - s_padding[40]
     -
   * - 0xFC
     - __be32
+68 −21
Original line number Diff line number Diff line
@@ -127,6 +127,58 @@ enum SHIFT_DIRECTION {
	SHIFT_RIGHT,
};

/*
 * For each criteria, mballoc has slightly different way of finding
 * the required blocks nad usually, higher the criteria the slower the
 * allocation.  We start at lower criterias and keep falling back to
 * higher ones if we are not able to find any blocks.  Lower (earlier)
 * criteria are faster.
 */
enum criteria {
	/*
	 * Used when number of blocks needed is a power of 2. This
	 * doesn't trigger any disk IO except prefetch and is the
	 * fastest criteria.
	 */
	CR_POWER2_ALIGNED,

	/*
	 * Tries to lookup in-memory data structures to find the most
	 * suitable group that satisfies goal request. No disk IO
	 * except block prefetch.
	 */
	CR_GOAL_LEN_FAST,

        /*
	 * Same as CR_GOAL_LEN_FAST but is allowed to reduce the goal
         * length to the best available length for faster allocation.
	 */
	CR_BEST_AVAIL_LEN,

	/*
	 * Reads each block group sequentially, performing disk IO if
	 * necessary, to find find_suitable block group. Tries to
	 * allocate goal length but might trim the request if nothing
	 * is found after enough tries.
	 */
	CR_GOAL_LEN_SLOW,

	/*
	 * Finds the first free set of blocks and allocates
	 * those. This is only used in rare cases when
	 * CR_GOAL_LEN_SLOW also fails to allocate anything.
	 */
	CR_ANY_FREE,

	/*
	 * Number of criterias defined.
	 */
	EXT4_MB_NUM_CRS
};

/* criteria below which we use fast block scanning and avoid unnecessary IO */
#define CR_FAST CR_GOAL_LEN_SLOW

/*
 * Flags used in mballoc's allocation_context flags field.
 *
@@ -165,9 +217,12 @@ enum SHIFT_DIRECTION {
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK		0x4000
/* Large fragment size list lookup succeeded at least once for cr = 0 */
#define EXT4_MB_CR0_OPTIMIZED		0x8000
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED		0x8000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
#define EXT4_MB_CR1_OPTIMIZED		0x00010000
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED		0x00010000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED		0x00020000

struct ext4_allocation_request {
	/* target inode for block we're allocating */
	struct inode *inode;
@@ -1532,21 +1587,25 @@ struct ext4_sb_info {
	unsigned long s_mb_last_start;
	unsigned int s_mb_prefetch;
	unsigned int s_mb_prefetch_limit;
	unsigned int s_mb_best_avail_max_trim_order;

	/* stats for buddy allocator */
	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
	atomic_t s_bal_success;	/* we found long enough chunks */
	atomic_t s_bal_allocated;	/* in blocks */
	atomic_t s_bal_ex_scanned;	/* total extents scanned */
	atomic_t s_bal_cX_ex_scanned[EXT4_MB_NUM_CRS];	/* total extents scanned */
	atomic_t s_bal_groups_scanned;	/* number of groups scanned */
	atomic_t s_bal_goals;	/* goal hits */
	atomic_t s_bal_len_goals;	/* len goal hits */
	atomic_t s_bal_breaks;	/* too long searches */
	atomic_t s_bal_2orders;	/* 2^order hits */
	atomic_t s_bal_cr0_bad_suggestions;
	atomic_t s_bal_cr1_bad_suggestions;
	atomic64_t s_bal_cX_groups_considered[4];
	atomic64_t s_bal_cX_hits[4];
	atomic64_t s_bal_cX_failed[4];		/* cX loop didn't find blocks */
	atomic_t s_bal_p2_aligned_bad_suggestions;
	atomic_t s_bal_goal_fast_bad_suggestions;
	atomic_t s_bal_best_avail_bad_suggestions;
	atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
	atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
	atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS];		/* cX loop didn't find blocks */
	atomic_t s_mb_buddies_generated;	/* number of buddies generated */
	atomic64_t s_mb_generation_time;
	atomic_t s_mb_lost_chunks;
@@ -2632,10 +2691,6 @@ extern void ext4_get_group_no_and_offset(struct super_block *sb,
extern ext4_group_t ext4_get_group_number(struct super_block *sb,
					  ext4_fsblk_t block);

extern unsigned int ext4_block_group(struct super_block *sb,
			ext4_fsblk_t blocknr);
extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
			ext4_fsblk_t blocknr);
extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
			ext4_group_t group);
@@ -2841,8 +2896,6 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
/* mballoc.c */
extern const struct seq_operations ext4_mb_seq_groups_ops;
extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
extern long ext4_mb_stats;
extern long ext4_mb_max_to_scan;
extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
extern int ext4_mb_init(struct super_block *);
extern int ext4_mb_release(struct super_block *);
@@ -3481,14 +3534,8 @@ extern int ext4_try_to_write_inline_data(struct address_space *mapping,
					 struct inode *inode,
					 loff_t pos, unsigned len,
					 struct page **pagep);
extern int ext4_write_inline_data_end(struct inode *inode,
				      loff_t pos, unsigned len,
				      unsigned copied,
				      struct page *page);
extern struct buffer_head *
ext4_journalled_write_inline_data(struct inode *inode,
				  unsigned len,
				  struct page *page);
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
			       unsigned copied, struct folio *folio);
extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
					   struct inode *inode,
					   loff_t pos, unsigned len,
+13 −36
Original line number Diff line number Diff line
@@ -3123,7 +3123,7 @@ void ext4_ext_release(struct super_block *sb)
#endif
}

static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
{
	ext4_lblk_t  ee_block;
	ext4_fsblk_t ee_pblock;
@@ -3134,9 +3134,9 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
	ee_pblock = ext4_ext_pblock(ex);

	if (ee_len == 0)
		return 0;
		return;

	return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
	ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
			      EXTENT_STATUS_WRITTEN);
}

@@ -3287,7 +3287,7 @@ static int ext4_split_extent_at(handle_t *handle,
			err = ext4_ext_dirty(handle, inode, path + path->p_depth);
			if (!err)
				/* update extent status tree */
				err = ext4_zeroout_es(inode, &zero_ex);
				ext4_zeroout_es(inode, &zero_ex);
			/* If we failed at this point, we don't know in which
			 * state the extent tree exactly is so don't try to fix
			 * length of the original extent as it may do even more
@@ -3640,9 +3640,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
out:
	/* If we have gotten a failure, don't zero out status tree */
	if (!err) {
		err = ext4_zeroout_es(inode, &zero_ex1);
		if (!err)
			err = ext4_zeroout_es(inode, &zero_ex2);
		ext4_zeroout_es(inode, &zero_ex1);
		ext4_zeroout_es(inode, &zero_ex2);
	}
	return err ? err : allocated;
}
@@ -4403,15 +4402,8 @@ int ext4_ext_truncate(handle_t *handle, struct inode *inode)

	last_block = (inode->i_size + sb->s_blocksize - 1)
			>> EXT4_BLOCK_SIZE_BITS(sb);
retry:
	err = ext4_es_remove_extent(inode, last_block,
				    EXT_MAX_BLOCKS - last_block);
	if (err == -ENOMEM) {
		memalloc_retry_wait(GFP_ATOMIC);
		goto retry;
	}
	if (err)
		return err;
	ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);

retry_remove_space:
	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
	if (err == -ENOMEM) {
@@ -5363,13 +5355,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)

	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_discard_preallocations(inode, 0);

	ret = ext4_es_remove_extent(inode, punch_start,
				    EXT_MAX_BLOCKS - punch_start);
	if (ret) {
		up_write(&EXT4_I(inode)->i_data_sem);
		goto out_stop;
	}
	ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);

	ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
	if (ret) {
@@ -5547,12 +5533,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
		ext4_free_ext_path(path);
	}

	ret = ext4_es_remove_extent(inode, offset_lblk,
			EXT_MAX_BLOCKS - offset_lblk);
	if (ret) {
		up_write(&EXT4_I(inode)->i_data_sem);
		goto out_stop;
	}
	ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk);

	/*
	 * if offset_lblk lies in a hole which is at start of file, use
@@ -5610,12 +5591,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
	BUG_ON(!inode_is_locked(inode1));
	BUG_ON(!inode_is_locked(inode2));

	*erp = ext4_es_remove_extent(inode1, lblk1, count);
	if (unlikely(*erp))
		return 0;
	*erp = ext4_es_remove_extent(inode2, lblk2, count);
	if (unlikely(*erp))
		return 0;
	ext4_es_remove_extent(inode1, lblk1, count);
	ext4_es_remove_extent(inode2, lblk2, count);

	while (count) {
		struct ext4_extent *ex1, *ex2, tmp_ex;
+129 −78
Original line number Diff line number Diff line
@@ -144,9 +144,11 @@
static struct kmem_cache *ext4_es_cachep;
static struct kmem_cache *ext4_pending_cachep;

static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
			      struct extent_status *prealloc);
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
			      ext4_lblk_t end, int *reserved);
			      ext4_lblk_t end, int *reserved,
			      struct extent_status *prealloc);
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
		       struct ext4_inode_info *locked_ei);
@@ -446,22 +448,36 @@ static void ext4_es_list_del(struct inode *inode)
	spin_unlock(&sbi->s_es_lock);
}

static struct extent_status *
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
		     ext4_fsblk_t pblk)
/*
 * Returns true if we cannot fail to allocate memory for this extent_status
 * entry and cannot reclaim it until its status changes.
 */
static inline bool ext4_es_must_keep(struct extent_status *es)
{
	/* fiemap, bigalloc, and seek_data/hole need to use it. */
	if (ext4_es_is_delayed(es))
		return true;

	return false;
}

static inline struct extent_status *__es_alloc_extent(bool nofail)
{
	if (!nofail)
		return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);

	return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL);
}

static void ext4_es_init_extent(struct inode *inode, struct extent_status *es,
		ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
{
	struct extent_status *es;
	es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
	if (es == NULL)
		return NULL;
	es->es_lblk = lblk;
	es->es_len = len;
	es->es_pblk = pblk;

	/*
	 * We don't count delayed extent because we never try to reclaim them
	 */
	if (!ext4_es_is_delayed(es)) {
	/* We never try to reclaim a must kept extent, so we don't count it. */
	if (!ext4_es_must_keep(es)) {
		if (!EXT4_I(inode)->i_es_shk_nr++)
			ext4_es_list_add(inode);
		percpu_counter_inc(&EXT4_SB(inode->i_sb)->
@@ -470,8 +486,11 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,

	EXT4_I(inode)->i_es_all_nr++;
	percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
}

	return es;
static inline void __es_free_extent(struct extent_status *es)
{
	kmem_cache_free(ext4_es_cachep, es);
}

static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
@@ -479,8 +498,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
	EXT4_I(inode)->i_es_all_nr--;
	percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);

	/* Decrease the shrink counter when this es is not delayed */
	if (!ext4_es_is_delayed(es)) {
	/* Decrease the shrink counter when we can reclaim the extent. */
	if (!ext4_es_must_keep(es)) {
		BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
		if (!--EXT4_I(inode)->i_es_shk_nr)
			ext4_es_list_del(inode);
@@ -488,7 +507,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
					s_es_stats.es_stats_shk_cnt);
	}

	kmem_cache_free(ext4_es_cachep, es);
	__es_free_extent(es);
}

/*
@@ -749,7 +768,8 @@ static inline void ext4_es_insert_extent_check(struct inode *inode,
}
#endif

static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
			      struct extent_status *prealloc)
{
	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
	struct rb_node **p = &tree->root.rb_node;
@@ -789,10 +809,15 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
		}
	}

	es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
				  newes->es_pblk);
	if (prealloc)
		es = prealloc;
	else
		es = __es_alloc_extent(false);
	if (!es)
		return -ENOMEM;
	ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len,
			    newes->es_pblk);

	rb_link_node(&es->rb_node, parent, p);
	rb_insert_color(&es->rb_node, &tree->root);

@@ -804,26 +829,27 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
/*
 * ext4_es_insert_extent() adds information to an inode's extent
 * status tree.
 *
 * Return 0 on success, error code on failure.
 */
int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
			   ext4_lblk_t len, ext4_fsblk_t pblk,
			   unsigned int status)
{
	struct extent_status newes;
	ext4_lblk_t end = lblk + len - 1;
	int err = 0;
	int err1 = 0;
	int err2 = 0;
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct extent_status *es1 = NULL;
	struct extent_status *es2 = NULL;

	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
		return 0;
		return;

	es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
		 lblk, len, pblk, status, inode->i_ino);

	if (!len)
		return 0;
		return;

	BUG_ON(end < lblk);

@@ -842,29 +868,40 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,

	ext4_es_insert_extent_check(inode, &newes);

retry:
	if (err1 && !es1)
		es1 = __es_alloc_extent(true);
	if ((err1 || err2) && !es2)
		es2 = __es_alloc_extent(true);
	write_lock(&EXT4_I(inode)->i_es_lock);
	err = __es_remove_extent(inode, lblk, end, NULL);
	if (err != 0)

	err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
	if (err1 != 0)
		goto error;

	err2 = __es_insert_extent(inode, &newes, es2);
	if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
		err2 = 0;
	if (err2 != 0)
		goto error;
retry:
	err = __es_insert_extent(inode, &newes);
	if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
					  128, EXT4_I(inode)))
		goto retry;
	if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
		err = 0;

	if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
	    (status & EXTENT_STATUS_WRITTEN ||
	     status & EXTENT_STATUS_UNWRITTEN))
		__revise_pending(inode, lblk, len);

	/* es is pre-allocated but not used, free it. */
	if (es1 && !es1->es_len)
		__es_free_extent(es1);
	if (es2 && !es2->es_len)
		__es_free_extent(es2);
error:
	write_unlock(&EXT4_I(inode)->i_es_lock);
	if (err1 || err2)
		goto retry;

	ext4_es_print_tree(inode);

	return err;
	return;
}

/*
@@ -897,7 +934,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,

	es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
	if (!es || es->es_lblk > end)
		__es_insert_extent(inode, &newes);
		__es_insert_extent(inode, &newes, NULL);
	write_unlock(&EXT4_I(inode)->i_es_lock);
}

@@ -1287,6 +1324,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
 * @lblk - first block in range
 * @end - last block in range
 * @reserved - number of cluster reservations released
 * @prealloc - pre-allocated es to avoid memory allocation failures
 *
 * If @reserved is not NULL and delayed allocation is enabled, counts
 * block/cluster reservations freed by removing range and if bigalloc
@@ -1294,7 +1332,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
 * error code on failure.
 */
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
			      ext4_lblk_t end, int *reserved)
			      ext4_lblk_t end, int *reserved,
			      struct extent_status *prealloc)
{
	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
	struct rb_node *node;
@@ -1302,14 +1341,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
	struct extent_status orig_es;
	ext4_lblk_t len1, len2;
	ext4_fsblk_t block;
	int err;
	int err = 0;
	bool count_reserved = true;
	struct rsvd_count rc;

	if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
		count_reserved = false;
retry:
	err = 0;

	es = __es_tree_search(&tree->root, lblk);
	if (!es)
@@ -1343,14 +1380,13 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
					orig_es.es_len - len2;
			ext4_es_store_pblock_status(&newes, block,
						    ext4_es_status(&orig_es));
			err = __es_insert_extent(inode, &newes);
			err = __es_insert_extent(inode, &newes, prealloc);
			if (err) {
				if (!ext4_es_must_keep(&newes))
					return 0;

				es->es_lblk = orig_es.es_lblk;
				es->es_len = orig_es.es_len;
				if ((err == -ENOMEM) &&
				    __es_shrink(EXT4_SB(inode->i_sb),
							128, EXT4_I(inode)))
					goto retry;
				goto out;
			}
		} else {
@@ -1422,39 +1458,48 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 * @len - number of blocks to remove
 *
 * Reduces block/cluster reservation count and for bigalloc cancels pending
 * reservations as needed. Returns 0 on success, error code on failure.
 * reservations as needed.
 */
int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
			   ext4_lblk_t len)
{
	ext4_lblk_t end;
	int err = 0;
	int reserved = 0;
	struct extent_status *es = NULL;

	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
		return 0;
		return;

	trace_ext4_es_remove_extent(inode, lblk, len);
	es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
		 lblk, len, inode->i_ino);

	if (!len)
		return err;
		return;

	end = lblk + len - 1;
	BUG_ON(end < lblk);

retry:
	if (err && !es)
		es = __es_alloc_extent(true);
	/*
	 * ext4_clear_inode() depends on us taking i_es_lock unconditionally
	 * so that we are sure __es_shrink() is done with the inode before it
	 * is reclaimed.
	 */
	write_lock(&EXT4_I(inode)->i_es_lock);
	err = __es_remove_extent(inode, lblk, end, &reserved);
	err = __es_remove_extent(inode, lblk, end, &reserved, es);
	if (es && !es->es_len)
		__es_free_extent(es);
	write_unlock(&EXT4_I(inode)->i_es_lock);
	if (err)
		goto retry;

	ext4_es_print_tree(inode);
	ext4_da_release_space(inode, reserved);
	return err;
	return;
}

static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
@@ -1702,11 +1747,8 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,

		(*nr_to_scan)--;
		node = rb_next(&es->rb_node);
		/*
		 * We can't reclaim delayed extent from status tree because
		 * fiemap, bigallic, and seek_data/hole need to use it.
		 */
		if (ext4_es_is_delayed(es))

		if (ext4_es_must_keep(es))
			goto next;
		if (ext4_es_is_referenced(es)) {
			ext4_es_clear_referenced(es);
@@ -1770,7 +1812,7 @@ void ext4_clear_inode_es(struct inode *inode)
	while (node) {
		es = rb_entry(node, struct extent_status, rb_node);
		node = rb_next(node);
		if (!ext4_es_is_delayed(es)) {
		if (!ext4_es_must_keep(es)) {
			rb_erase(&es->rb_node, &tree->root);
			ext4_es_free_extent(inode, es);
		}
@@ -1972,17 +2014,18 @@ bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
 * @lblk - logical block to be added
 * @allocated - indicates whether a physical cluster has been allocated for
 *              the logical cluster that contains the block
 *
 * Returns 0 on success, negative error code on failure.
 */
int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
				  bool allocated)
{
	struct extent_status newes;
	int err = 0;
	int err1 = 0;
	int err2 = 0;
	struct extent_status *es1 = NULL;
	struct extent_status *es2 = NULL;

	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
		return 0;
		return;

	es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
		 lblk, inode->i_ino);
@@ -1994,29 +2037,37 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,

	ext4_es_insert_extent_check(inode, &newes);

retry:
	if (err1 && !es1)
		es1 = __es_alloc_extent(true);
	if ((err1 || err2) && !es2)
		es2 = __es_alloc_extent(true);
	write_lock(&EXT4_I(inode)->i_es_lock);

	err = __es_remove_extent(inode, lblk, lblk, NULL);
	if (err != 0)
	err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
	if (err1 != 0)
		goto error;
retry:
	err = __es_insert_extent(inode, &newes);
	if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
					  128, EXT4_I(inode)))
		goto retry;
	if (err != 0)

	err2 = __es_insert_extent(inode, &newes, es2);
	if (err2 != 0)
		goto error;

	if (allocated)
		__insert_pending(inode, lblk);

	/* es is pre-allocated but not used, free it. */
	if (es1 && !es1->es_len)
		__es_free_extent(es1);
	if (es2 && !es2->es_len)
		__es_free_extent(es2);
error:
	write_unlock(&EXT4_I(inode)->i_es_lock);
	if (err1 || err2)
		goto retry;

	ext4_es_print_tree(inode);
	ext4_print_pending_tree(inode);

	return err;
	return;
}

/*
+7 −7
Original line number Diff line number Diff line
@@ -127,13 +127,13 @@ extern int __init ext4_init_es(void);
extern void ext4_exit_es(void);
extern void ext4_es_init_tree(struct ext4_es_tree *tree);

extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
				  ext4_lblk_t len, ext4_fsblk_t pblk,
				  unsigned int status);
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
				 ext4_lblk_t len, ext4_fsblk_t pblk,
				 unsigned int status);
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
extern void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
				  ext4_lblk_t len);
extern void ext4_es_find_extent_range(struct inode *inode,
				      int (*match_fn)(struct extent_status *es),
@@ -249,7 +249,7 @@ extern void ext4_exit_pending(void);
extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
extern int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
extern void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
					 bool allocated);
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
					ext4_lblk_t len);
Loading