Commit 57aff997 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ext4_for_linus-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Cleanup ext4's multi-block allocator, including adding some unit
  tests, as well as cleaning how we update the backup superblock after
  online resizes or updating the label or uuid.

  Optimize handling of released data blocks in ext4's commit machinery
  to avoid a potential lock contention on s_md_lock spinlock.

  Fix a number of ext4 bugs:

   - fix race between writepages and remount

   - fix racy may inline data check in dio write

   - add missed brelse in an error path in update_backups

   - fix umask handling when ACL support is disabled

   - fix lost EIO error when a journal commit races with a fsync of the
     blockdev

   - fix potential improper i_size when there is a crash right after an
     O_SYNC direct write.

   - check extent node for validity before potentially using what might
     be an invalid pointer

   - fix potential stale data exposure when writing to an unwritten
     extent and the file system is nearly out of space

   - fix potential accounting error around block reservations when
     writing partial delayed allocation writes to a bigalloc cluster

   - avoid memory allocation failure when tracking partial delayed
     allocation writes to a bigalloc cluster

   - fix various debugging print messages"

* tag 'ext4_for_linus-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (41 commits)
  ext4: properly sync file size update after O_SYNC direct IO
  ext4: fix racy may inline data check in dio write
  ext4: run mballoc test with different layouts setting
  ext4: add first unit test for ext4_mb_new_blocks_simple in mballoc
  ext4: add some kunit stub for mballoc kunit test
  ext4: call ext4_mb_mark_context in ext4_group_add_blocks()
  ext4: Separate block bitmap and buddy bitmap freeing in ext4_group_add_blocks()
  ext4: call ext4_mb_mark_context in ext4_mb_clear_bb
  ext4: Separate block bitmap and buddy bitmap freeing in ext4_mb_clear_bb()
  ext4: call ext4_mb_mark_context in ext4_mb_mark_diskspace_used
  ext4: extend ext4_mb_mark_context to support allocation under journal
  ext4: call ext4_mb_mark_context in ext4_free_blocks_simple
  ext4: factor out codes to update block bitmap and group descriptor on disk from ext4_mb_mark_bb
  ext4: make state in ext4_mb_mark_bb to be bool
  jbd2: fix potential data lost in recovering journal raced with synchronizing fs bdev
  ext4: apply umask if ACL support is disabled
  ext4: mark buffer new if it is unwritten to avoid stale data exposure
  ext4: move 'ix' sanity check to corrent position
  jbd2: fix printk format type for 'io_block' in do_one_pass()
  jbd2: print io_block if check data block checksum failed when do recovery
  ...
parents 91a683cd 91562895
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -68,6 +68,11 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
	/* usually, the umask is applied by posix_acl_create(), but if
	   ext4 ACL support is disabled at compile time, we need to do
	   it here, because posix_acl_create() will never be called */
	inode->i_mode &= ~current_umask();

	return 0;
}
#endif  /* CONFIG_EXT4_FS_POSIX_ACL */
+12 −4
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include "mballoc.h"

#include <trace/events/ext4.h>
#include <kunit/static_stub.h>

static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
					    ext4_group_t block_group);
@@ -111,10 +112,8 @@ static unsigned ext4_num_overhead_clusters(struct super_block *sb,
	itbl_blk_start = ext4_inode_table(sb, gdp);
	itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1;
	if (itbl_blk_start <= end && itbl_blk_end >= start) {
		itbl_blk_start = itbl_blk_start >= start ?
			itbl_blk_start : start;
		itbl_blk_end = itbl_blk_end <= end ?
			itbl_blk_end : end;
		itbl_blk_start = max(itbl_blk_start, start);
		itbl_blk_end = min(itbl_blk_end, end);

		itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start);
		itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start);
@@ -274,6 +273,9 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct buffer_head *bh_p;

	KUNIT_STATIC_STUB_REDIRECT(ext4_get_group_desc,
				   sb, block_group, bh);

	if (block_group >= ngroups) {
		ext4_error(sb, "block_group >= groups_count - block_group = %u,"
			   " groups_count = %u", block_group, ngroups);
@@ -468,6 +470,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
	ext4_fsblk_t bitmap_blk;
	int err;

	KUNIT_STATIC_STUB_REDIRECT(ext4_read_block_bitmap_nowait,
				   sb, block_group, ignore_locked);

	desc = ext4_get_group_desc(sb, block_group, NULL);
	if (!desc)
		return ERR_PTR(-EFSCORRUPTED);
@@ -563,6 +568,9 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
{
	struct ext4_group_desc *desc;

	KUNIT_STATIC_STUB_REDIRECT(ext4_wait_block_bitmap,
				   sb, block_group, bh);

	if (!buffer_new(bh))
		return 0;
	desc = ext4_get_group_desc(sb, block_group, NULL);
+5 −3
Original line number Diff line number Diff line
@@ -1504,6 +1504,7 @@ struct ext4_sb_info {
	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
	struct buffer_head * s_sbh;	/* Buffer containing the super block */
	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
	/* Array of bh's for the block group descriptors */
	struct buffer_head * __rcu *s_group_desc;
	unsigned int s_mount_opt;
	unsigned int s_mount_opt2;
@@ -1574,7 +1575,7 @@ struct ext4_sb_info {
	unsigned int *s_mb_maxs;
	unsigned int s_group_info_size;
	unsigned int s_mb_free_pending;
	struct list_head s_freed_data_list;	/* List of blocks to be freed
	struct list_head s_freed_data_list[2];	/* List of blocks to be freed
						   after commit completed */
	struct list_head s_discard_list;
	struct work_struct s_discard_work;
@@ -1686,7 +1687,8 @@ struct ext4_sb_info {

	/*
	 * Barrier between writepages ops and changing any inode's JOURNAL_DATA
	 * or EXTENTS flag.
	 * or EXTENTS flag or between writepages ops and changing DELALLOC or
	 * DIOREAD_NOLOCK mount options on remount.
	 */
	struct percpu_rw_semaphore s_writepages_rwsem;
	struct dax_device *s_daxdev;
@@ -2934,7 +2936,7 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
		       int len, int state);
			    int len, bool state);
static inline bool ext4_mb_cr_expensive(enum criteria cr)
{
	return cr >= CR_GOAL_LEN_SLOW;
+7 −7
Original line number Diff line number Diff line
@@ -1010,6 +1010,11 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
		ix = curp->p_idx;
	}

	if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
		EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
		return -EFSCORRUPTED;
	}

	len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
	BUG_ON(len < 0);
	if (len > 0) {
@@ -1019,11 +1024,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
		memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
	}

	if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
		EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
		return -EFSCORRUPTED;
	}

	ix->ei_block = cpu_to_le32(logical);
	ext4_idx_store_pblock(ix, ptr);
	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
@@ -6081,13 +6081,13 @@ int ext4_ext_clear_bb(struct inode *inode)
				for (j = 0; j < path->p_depth; j++) {

					ext4_mb_mark_bb(inode->i_sb,
							path[j].p_block, 1, 0);
							path[j].p_block, 1, false);
					ext4_fc_record_regions(inode->i_sb, inode->i_ino,
							0, path[j].p_block, 1, 1);
				}
				ext4_free_ext_path(path);
			}
			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false);
			ext4_fc_record_regions(inode->i_sb, inode->i_ino,
					map.m_lblk, map.m_pblk, map.m_len, 1);
		}
+91 −36
Original line number Diff line number Diff line
@@ -152,8 +152,9 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
		       struct ext4_inode_info *locked_ei);
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			     ext4_lblk_t len);
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			    ext4_lblk_t len,
			    struct pending_reservation **prealloc);

int __init ext4_init_es(void)
{
@@ -448,6 +449,19 @@ static void ext4_es_list_del(struct inode *inode)
	spin_unlock(&sbi->s_es_lock);
}

static inline struct pending_reservation *__alloc_pending(bool nofail)
{
	if (!nofail)
		return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);

	return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
}

static inline void __free_pending(struct pending_reservation *pr)
{
	kmem_cache_free(ext4_pending_cachep, pr);
}

/*
 * Returns true if we cannot fail to allocate memory for this extent_status
 * entry and cannot reclaim it until its status changes.
@@ -836,11 +850,12 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
{
	struct extent_status newes;
	ext4_lblk_t end = lblk + len - 1;
	int err1 = 0;
	int err2 = 0;
	int err1 = 0, err2 = 0, err3 = 0;
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct extent_status *es1 = NULL;
	struct extent_status *es2 = NULL;
	struct pending_reservation *pr = NULL;
	bool revise_pending = false;

	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
		return;
@@ -868,11 +883,17 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,

	ext4_es_insert_extent_check(inode, &newes);

	revise_pending = sbi->s_cluster_ratio > 1 &&
			 test_opt(inode->i_sb, DELALLOC) &&
			 (status & (EXTENT_STATUS_WRITTEN |
				    EXTENT_STATUS_UNWRITTEN));
retry:
	if (err1 && !es1)
		es1 = __es_alloc_extent(true);
	if ((err1 || err2) && !es2)
		es2 = __es_alloc_extent(true);
	if ((err1 || err2 || err3) && revise_pending && !pr)
		pr = __alloc_pending(true);
	write_lock(&EXT4_I(inode)->i_es_lock);

	err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
@@ -897,13 +918,18 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
		es2 = NULL;
	}

	if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
	    (status & EXTENT_STATUS_WRITTEN ||
	     status & EXTENT_STATUS_UNWRITTEN))
		__revise_pending(inode, lblk, len);
	if (revise_pending) {
		err3 = __revise_pending(inode, lblk, len, &pr);
		if (err3 != 0)
			goto error;
		if (pr) {
			__free_pending(pr);
			pr = NULL;
		}
	}
error:
	write_unlock(&EXT4_I(inode)->i_es_lock);
	if (err1 || err2)
	if (err1 || err2 || err3)
		goto retry;

	ext4_es_print_tree(inode);
@@ -1311,7 +1337,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
				rc->ndelonly--;
				node = rb_next(&pr->rb_node);
				rb_erase(&pr->rb_node, &tree->root);
				kmem_cache_free(ext4_pending_cachep, pr);
				__free_pending(pr);
				if (!node)
					break;
				pr = rb_entry(node, struct pending_reservation,
@@ -1405,8 +1431,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
			}
		}
		if (count_reserved)
			count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
				   &orig_es, &rc);
			count_rsvd(inode, orig_es.es_lblk + len1,
				   orig_es.es_len - len1 - len2, &orig_es, &rc);
		goto out_get_reserved;
	}

@@ -1907,11 +1933,13 @@ static struct pending_reservation *__get_pending(struct inode *inode,
 *
 * @inode - file containing the cluster
 * @lblk - logical block in the cluster to be added
 * @prealloc - preallocated pending entry
 *
 * Returns 0 on successful insertion and -ENOMEM on failure.  If the
 * pending reservation is already in the set, returns successfully.
 */
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
			    struct pending_reservation **prealloc)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
@@ -1937,11 +1965,16 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
		}
	}

	pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
	if (pr == NULL) {
	if (likely(*prealloc == NULL)) {
		pr = __alloc_pending(false);
		if (!pr) {
			ret = -ENOMEM;
			goto out;
		}
	} else {
		pr = *prealloc;
		*prealloc = NULL;
	}
	pr->lclu = lclu;

	rb_link_node(&pr->rb_node, parent, p);
@@ -1970,7 +2003,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
	if (pr != NULL) {
		tree = &EXT4_I(inode)->i_pending_tree;
		rb_erase(&pr->rb_node, &tree->root);
		kmem_cache_free(ext4_pending_cachep, pr);
		__free_pending(pr);
	}
}

@@ -2029,10 +2062,10 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
				  bool allocated)
{
	struct extent_status newes;
	int err1 = 0;
	int err2 = 0;
	int err1 = 0, err2 = 0, err3 = 0;
	struct extent_status *es1 = NULL;
	struct extent_status *es2 = NULL;
	struct pending_reservation *pr = NULL;

	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
		return;
@@ -2052,6 +2085,8 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
		es1 = __es_alloc_extent(true);
	if ((err1 || err2) && !es2)
		es2 = __es_alloc_extent(true);
	if ((err1 || err2 || err3) && allocated && !pr)
		pr = __alloc_pending(true);
	write_lock(&EXT4_I(inode)->i_es_lock);

	err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
@@ -2074,11 +2109,18 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
		es2 = NULL;
	}

	if (allocated)
		__insert_pending(inode, lblk);
	if (allocated) {
		err3 = __insert_pending(inode, lblk, &pr);
		if (err3 != 0)
			goto error;
		if (pr) {
			__free_pending(pr);
			pr = NULL;
		}
	}
error:
	write_unlock(&EXT4_I(inode)->i_es_lock);
	if (err1 || err2)
	if (err1 || err2 || err3)
		goto retry;

	ext4_es_print_tree(inode);
@@ -2184,21 +2226,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
 * @inode - file containing the range
 * @lblk - logical block defining the start of range
 * @len  - length of range in blocks
 * @prealloc - preallocated pending entry
 *
 * Used after a newly allocated extent is added to the extents status tree.
 * Requires that the extents in the range have either written or unwritten
 * status.  Must be called while holding i_es_lock.
 */
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			     ext4_lblk_t len)
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			    ext4_lblk_t len,
			    struct pending_reservation **prealloc)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	ext4_lblk_t end = lblk + len - 1;
	ext4_lblk_t first, last;
	bool f_del = false, l_del = false;
	int ret = 0;

	if (len == 0)
		return;
		return 0;

	/*
	 * Two cases - block range within single cluster and block range
@@ -2219,7 +2264,9 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			f_del = __es_scan_range(inode, &ext4_es_is_delonly,
						first, lblk - 1);
		if (f_del) {
			__insert_pending(inode, first);
			ret = __insert_pending(inode, first, prealloc);
			if (ret < 0)
				goto out;
		} else {
			last = EXT4_LBLK_CMASK(sbi, end) +
			       sbi->s_cluster_ratio - 1;
@@ -2227,9 +2274,11 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
				l_del = __es_scan_range(inode,
							&ext4_es_is_delonly,
							end + 1, last);
			if (l_del)
				__insert_pending(inode, last);
			else
			if (l_del) {
				ret = __insert_pending(inode, last, prealloc);
				if (ret < 0)
					goto out;
			} else
				__remove_pending(inode, last);
		}
	} else {
@@ -2237,18 +2286,24 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
		if (first != lblk)
			f_del = __es_scan_range(inode, &ext4_es_is_delonly,
						first, lblk - 1);
		if (f_del)
			__insert_pending(inode, first);
		else
		if (f_del) {
			ret = __insert_pending(inode, first, prealloc);
			if (ret < 0)
				goto out;
		} else
			__remove_pending(inode, first);

		last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
		if (last != end)
			l_del = __es_scan_range(inode, &ext4_es_is_delonly,
						end + 1, last);
		if (l_del)
			__insert_pending(inode, last);
		else
		if (l_del) {
			ret = __insert_pending(inode, last, prealloc);
			if (ret < 0)
				goto out;
		} else
			__remove_pending(inode, last);
	}
out:
	return ret;
}
Loading