Commit ff7dcfed authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ext4_for_linus_6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Major ext4 changes for 6.17:

   - Better scalability for ext4 block allocation

   - Fix insufficient credits when writing back large folios

  Miscellaneous bug fixes, especially when handling exteded attriutes,
  inline data, and fast commit"

* tag 'ext4_for_linus_6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (39 commits)
  ext4: do not BUG when INLINE_DATA_FL lacks system.data xattr
  ext4: implement linear-like traversal across order xarrays
  ext4: refactor choose group to scan group
  ext4: convert free groups order lists to xarrays
  ext4: factor out ext4_mb_scan_group()
  ext4: factor out ext4_mb_might_prefetch()
  ext4: factor out __ext4_mb_scan_group()
  ext4: fix largest free orders lists corruption on mb_optimize_scan switch
  ext4: fix zombie groups in average fragment size lists
  ext4: merge freed extent with existing extents before insertion
  ext4: convert sbi->s_mb_free_pending to atomic_t
  ext4: fix typo in CR_GOAL_LEN_SLOW comment
  ext4: get rid of some obsolete EXT4_MB_HINT flags
  ext4: utilize multiple global goals to reduce contention
  ext4: remove unnecessary s_md_lock on update s_mb_last_group
  ext4: remove unnecessary s_mb_last_start
  ext4: separate stream goal hits from s_bal_goals for better tracking
  ext4: add ext4_try_lock_group() to skip busy groups
  ext4: initialize superblock fields in the kballoc-test.c kunit tests
  ext4: refactor the inline directory conversion and new directory codepaths
  ...
parents 44a8c96e 099b847c
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -703,7 +703,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
	 * possible we just missed a transaction commit that did so
	 */
	smp_mb();
	if (sbi->s_mb_free_pending == 0) {
	if (atomic_read(&sbi->s_mb_free_pending) == 0) {
		if (test_opt(sb, DISCARD)) {
			atomic_inc(&sbi->s_retry_alloc_pending);
			flush_work(&sbi->s_discard_work);
+30 −44
Original line number Diff line number Diff line
@@ -157,7 +157,7 @@ enum criteria {

	/*
	 * Reads each block group sequentially, performing disk IO if
	 * necessary, to find find_suitable block group. Tries to
	 * necessary, to find suitable block group. Tries to
	 * allocate goal length but might trim the request if nothing
	 * is found after enough tries.
	 */
@@ -185,14 +185,8 @@ enum criteria {

/* prefer goal again. length */
#define EXT4_MB_HINT_MERGE		0x0001
/* blocks already reserved */
#define EXT4_MB_HINT_RESERVED		0x0002
/* metadata is being allocated */
#define EXT4_MB_HINT_METADATA		0x0004
/* first blocks in the file */
#define EXT4_MB_HINT_FIRST		0x0008
/* search for the best chunk */
#define EXT4_MB_HINT_BEST		0x0010
/* data is being allocated */
#define EXT4_MB_HINT_DATA		0x0020
/* don't preallocate (for tails) */
@@ -213,15 +207,6 @@ enum criteria {
#define EXT4_MB_USE_RESERVED		0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK		0x4000
/* Large fragment size list lookup succeeded at least once for
 * CR_POWER2_ALIGNED */
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED		0x8000
/* Avg fragment size rb tree lookup succeeded at least once for
 * CR_GOAL_LEN_FAST */
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED		0x00010000
/* Avg fragment size rb tree lookup succeeded at least once for
 * CR_BEST_AVAIL_LEN */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED		0x00020000

struct ext4_allocation_request {
	/* target inode for block we're allocating */
@@ -1608,16 +1593,14 @@ struct ext4_sb_info {
	unsigned short *s_mb_offsets;
	unsigned int *s_mb_maxs;
	unsigned int s_group_info_size;
	unsigned int s_mb_free_pending;
	atomic_t s_mb_free_pending;
	struct list_head s_freed_data_list[2];	/* List of blocks to be freed
						   after commit completed */
	struct list_head s_discard_list;
	struct work_struct s_discard_work;
	atomic_t s_retry_alloc_pending;
	struct list_head *s_mb_avg_fragment_size;
	rwlock_t *s_mb_avg_fragment_size_locks;
	struct list_head *s_mb_largest_free_orders;
	rwlock_t *s_mb_largest_free_orders_locks;
	struct xarray *s_mb_avg_fragment_size;
	struct xarray *s_mb_largest_free_orders;

	/* tunables */
	unsigned long s_stripe;
@@ -1629,15 +1612,16 @@ struct ext4_sb_info {
	unsigned int s_mb_order2_reqs;
	unsigned int s_mb_group_prealloc;
	unsigned int s_max_dir_size_kb;
	/* where last allocation was done - for stream allocation */
	unsigned long s_mb_last_group;
	unsigned long s_mb_last_start;
	unsigned int s_mb_prefetch;
	unsigned int s_mb_prefetch_limit;
	unsigned int s_mb_best_avail_max_trim_order;
	unsigned int s_sb_update_sec;
	unsigned int s_sb_update_kb;

	/* where last allocation was done - for stream allocation */
	ext4_group_t *s_mb_last_groups;
	unsigned int s_mb_nr_global_goals;

	/* stats for buddy allocator */
	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
	atomic_t s_bal_success;	/* we found long enough chunks */
@@ -1646,12 +1630,10 @@ struct ext4_sb_info {
	atomic_t s_bal_cX_ex_scanned[EXT4_MB_NUM_CRS];	/* total extents scanned */
	atomic_t s_bal_groups_scanned;	/* number of groups scanned */
	atomic_t s_bal_goals;	/* goal hits */
	atomic_t s_bal_stream_goals;	/* stream allocation global goal hits */
	atomic_t s_bal_len_goals;	/* len goal hits */
	atomic_t s_bal_breaks;	/* too long searches */
	atomic_t s_bal_2orders;	/* 2^order hits */
	atomic_t s_bal_p2_aligned_bad_suggestions;
	atomic_t s_bal_goal_fast_bad_suggestions;
	atomic_t s_bal_best_avail_bad_suggestions;
	atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
	atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
	atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS];		/* cX loop didn't find blocks */
@@ -3020,7 +3002,7 @@ int ext4_walk_page_buffers(handle_t *handle,
				     struct buffer_head *bh));
int do_journal_get_write_access(handle_t *handle, struct inode *inode,
				struct buffer_head *bh);
bool ext4_should_enable_large_folio(struct inode *inode);
void ext4_set_inode_mapping_order(struct inode *inode);
#define FALL_BACK_TO_NONDELALLOC 1
#define CONVERT_INLINE_DATA	 2

@@ -3064,9 +3046,9 @@ extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_chunk_trans_extent(struct inode *inode, int nrblocks);
extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
				  int pextents);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
@@ -3489,8 +3471,6 @@ struct ext4_group_info {
	void            *bb_bitmap;
#endif
	struct rw_semaphore alloc_sem;
	struct list_head bb_avg_fragment_size_node;
	struct list_head bb_largest_free_order_node;
	ext4_grpblk_t	bb_counters[];	/* Nr of free power-of-two-block
					 * regions, index is order.
					 * bb_counters[3] = 5 means
@@ -3541,23 +3521,28 @@ static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
	return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
}

static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
static inline bool ext4_try_lock_group(struct super_block *sb, ext4_group_t group)
{
	spinlock_t *lock = ext4_group_lock_ptr(sb, group);
	if (spin_trylock(lock))
	if (!spin_trylock(ext4_group_lock_ptr(sb, group)))
		return false;
	/*
		 * We're able to grab the lock right away, so drop the
		 * lock contention counter.
	 * We're able to grab the lock right away, so drop the lock
	 * contention counter.
	 */
	atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
	else {
	return true;
}

static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{
	if (!ext4_try_lock_group(sb, group)) {
		/*
		 * The lock is busy, so bump the contention counter,
		 * and then wait on the spin lock.
		 */
		atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
				  EXT4_MAX_CONTENTION);
		spin_lock(lock);
		spin_lock(ext4_group_lock_ptr(sb, group));
	}
}

@@ -3612,6 +3597,7 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
extern void ext4_update_final_de(void *de_buf, int old_size, int new_size);

int ext4_readpage_inline(struct inode *inode, struct folio *folio);
extern int ext4_try_to_write_inline_data(struct address_space *mapping,
@@ -3671,10 +3657,10 @@ static inline int ext4_has_inline_data(struct inode *inode)
extern const struct inode_operations ext4_dir_inode_operations;
extern const struct inode_operations ext4_special_inode_operations;
extern struct dentry *ext4_get_parent(struct dentry *child);
extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
				 struct ext4_dir_entry_2 *de,
				 int blocksize, int csum_size,
				 unsigned int parent_ino, int dotdot_real_len);
extern int ext4_init_dirblock(handle_t *handle, struct inode *inode,
			      struct buffer_head *dir_block,
			      unsigned int parent_ino, void *inline_buf,
			      int inline_size);
extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
					unsigned int blocksize);
extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
+0 −7
Original line number Diff line number Diff line
@@ -30,13 +30,6 @@
 */
#define CHECK_BINSEARCH__

/*
 * If EXT_STATS is defined then stats numbers are collected.
 * These number will be displayed at umount time.
 */
#define EXT_STATS_


/*
 * ext4_inode has i_block array (60 bytes total).
 * The first 12 bytes store ext4_extent_header;
+3 −3
Original line number Diff line number Diff line
@@ -5215,7 +5215,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
				credits = depth + 2;
			}

			restart_credits = ext4_writepage_trans_blocks(inode);
			restart_credits = ext4_chunk_trans_extent(inode, 0);
			err = ext4_datasem_ensure_credits(handle, inode, credits,
					restart_credits, 0);
			if (err) {
@@ -5475,7 +5475,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)

	truncate_pagecache(inode, start);

	credits = ext4_writepage_trans_blocks(inode);
	credits = ext4_chunk_trans_extent(inode, 0);
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
	if (IS_ERR(handle))
		return PTR_ERR(handle);
@@ -5571,7 +5571,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)

	truncate_pagecache(inode, start);

	credits = ext4_writepage_trans_blocks(inode);
	credits = ext4_chunk_trans_extent(inode, 0);
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
	if (IS_ERR(handle))
		return PTR_ERR(handle);
+1 −2
Original line number Diff line number Diff line
@@ -1335,8 +1335,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
		}
	}

	if (ext4_should_enable_large_folio(inode))
		mapping_set_large_folios(inode->i_mapping);
	ext4_set_inode_mapping_order(inode);

	ext4_update_inode_fsync_trans(handle, inode, 1);

Loading