Commit 6fda0bb8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'mm-hotfixes-stable-2023-04-07-16-23' of...

Merge tag 'mm-hotfixes-stable-2023-04-07-16-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull MM fixes from Andrew Morton:
 "28 hotfixes.

  23 are cc:stable and the other five address issues which were
  introduced during this merge cycle.

  20 are for MM and the remainder are for other subsystems"

* tag 'mm-hotfixes-stable-2023-04-07-16-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (28 commits)
  maple_tree: fix a potential concurrency bug in RCU mode
  maple_tree: fix get wrong data_end in mtree_lookup_walk()
  mm/swap: fix swap_info_struct race between swapoff and get_swap_pages()
  nilfs2: fix sysfs interface lifetime
  mm: take a page reference when removing device exclusive entries
  mm: vmalloc: avoid warn_alloc noise caused by fatal signal
  nilfs2: initialize "struct nilfs_binfo_dat"->bi_pad field
  nilfs2: fix potential UAF of struct nilfs_sc_info in nilfs_segctor_thread()
  zsmalloc: document freeable stats
  zsmalloc: document new fullness grouping
  fsdax: force clear dirty mark if CoW
  mm/hugetlb: fix uffd wr-protection for CoW optimization path
  mm: enable maple tree RCU mode by default
  maple_tree: add RCU lock checking to rcu callback functions
  maple_tree: add smp_rmb() to dead node detection
  maple_tree: fix write memory barrier of nodes once dead for RCU mode
  maple_tree: remove extra smp_wmb() from mas_dead_leaves()
  maple_tree: fix freeing of nodes in rcu mode
  maple_tree: detect dead nodes in mas_start()
  maple_tree: be more cautious about dead nodes
  ...
parents aa318c48 c45ea315
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -265,7 +265,9 @@ Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
Leonard Crestez <leonard.crestez@nxp.com> Leonard Crestez <cdleonard@gmail.com>
Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
Leonard Göhrs <l.goehrs@pengutronix.de>
Leonid I Ananiev <leonid.i.ananiev@intel.com>
Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
+76 −59
Original line number Diff line number Diff line
@@ -39,13 +39,12 @@ With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via

 # cat /sys/kernel/debug/zsmalloc/zram0/classes

 class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage
 class  size       10%       20%       30%       40%       50%       60%       70%       80%       90%       99%      100% obj_allocated   obj_used pages_used pages_per_zspage freeable
    ...
    ...
     9   176           0            1           186        129          8                4
    10   192           1            0          2880       2872        135                3
    11   208           0            1           819        795         42                2
    12   224           0            1           219        159         12                4
    30   512         0        12         4         1         0         1         0         0         1         0       414          3464       3346        433                1       14
    31   528         2         7         2         2         1         0         1         0         0         2       117          4154       3793        536                4       44
    32   544         6         3         4         1         2         1         0         0         0         1       260          4170       3965        556                2       26
    ...
    ...

@@ -54,10 +53,28 @@ class
	index
size
	object size zspage stores
almost_empty
	the number of ZS_ALMOST_EMPTY zspages(see below)
almost_full
	the number of ZS_ALMOST_FULL zspages(see below)
10%
	the number of zspages with usage ratio less than 10% (see below)
20%
	the number of zspages with usage ratio between 10% and 20%
30%
	the number of zspages with usage ratio between 20% and 30%
40%
	the number of zspages with usage ratio between 30% and 40%
50%
	the number of zspages with usage ratio between 40% and 50%
60%
	the number of zspages with usage ratio between 50% and 60%
70%
	the number of zspages with usage ratio between 60% and 70%
80%
	the number of zspages with usage ratio between 70% and 80%
90%
	the number of zspages with usage ratio between 80% and 90%
99%
	the number of zspages with usage ratio between 90% and 99%
100%
	the number of zspages with usage ratio 100%
obj_allocated
	the number of objects allocated
obj_used
@@ -66,19 +83,14 @@ pages_used
	the number of pages allocated for the class
pages_per_zspage
	the number of 0-order pages to make a zspage
freeable
	the approximate number of pages class compaction can free

We assign a zspage to ZS_ALMOST_EMPTY fullness group when n <= N / f, where

* n = number of allocated objects
* N = total number of objects zspage can store
* f = fullness_threshold_frac(ie, 4 at the moment)

Similarly, we assign zspage to:

* ZS_ALMOST_FULL  when n > N / f
* ZS_EMPTY        when n == 0
* ZS_FULL         when n == N

Each zspage maintains inuse counter which keeps track of the number of
objects stored in the zspage.  The inuse counter determines the zspage's
"fullness group" which is calculated as the ratio of the "inuse" objects to
the total number of objects the zspage can hold (objs_per_zspage). The
closer the inuse counter is to objs_per_zspage, the better.

Internals
=========
@@ -94,10 +106,10 @@ of objects that each zspage can store.

For instance, consider the following size classes:::

  class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage freeable
  class  size       10%   ....    100% obj_allocated   obj_used pages_used pages_per_zspage freeable
  ...
     94  1536           0            0             0          0          0                3        0
    100  1632           0            0             0          0          0                2        0
     94  1536        0    ....       0             0          0          0                3        0
    100  1632        0    ....       0             0          0          0                2        0
  ...


@@ -134,10 +146,11 @@ reduces memory wastage.

Let's take a closer look at the bottom of `/sys/kernel/debug/zsmalloc/zramX/classes`:::

  class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage freeable
  class  size       10%   ....    100% obj_allocated   obj_used pages_used pages_per_zspage freeable

  ...
    202  3264           0            0             0          0          0                4        0
    254  4096           0            0             0          0          0                1        0
    202  3264         0   ..         0             0          0          0                4        0
    254  4096         0   ..         0             0          0          0                1        0
  ...

Size class #202 stores objects of size 3264 bytes and has a maximum of 4 pages
@@ -151,40 +164,42 @@ efficient storage of large objects.

For zspage chain size of 8, huge class watermark becomes 3632 bytes:::

  class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage freeable
  class  size       10%   ....    100% obj_allocated   obj_used pages_used pages_per_zspage freeable

  ...
    202  3264           0            0             0          0          0                4        0
    211  3408           0            0             0          0          0                5        0
    217  3504           0            0             0          0          0                6        0
    222  3584           0            0             0          0          0                7        0
    225  3632           0            0             0          0          0                8        0
    254  4096           0            0             0          0          0                1        0
    202  3264         0   ..         0             0          0          0                4        0
    211  3408         0   ..         0             0          0          0                5        0
    217  3504         0   ..         0             0          0          0                6        0
    222  3584         0   ..         0             0          0          0                7        0
    225  3632         0   ..         0             0          0          0                8        0
    254  4096         0   ..         0             0          0          0                1        0
  ...

For zspage chain size of 16, huge class watermark becomes 3840 bytes:::

  class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage freeable
  class  size       10%   ....    100% obj_allocated   obj_used pages_used pages_per_zspage freeable

  ...
    202  3264           0            0             0          0          0                4        0
    206  3328           0            0             0          0          0               13        0
    207  3344           0            0             0          0          0                9        0
    208  3360           0            0             0          0          0               14        0
    211  3408           0            0             0          0          0                5        0
    212  3424           0            0             0          0          0               16        0
    214  3456           0            0             0          0          0               11        0
    217  3504           0            0             0          0          0                6        0
    219  3536           0            0             0          0          0               13        0
    222  3584           0            0             0          0          0                7        0
    223  3600           0            0             0          0          0               15        0
    225  3632           0            0             0          0          0                8        0
    228  3680           0            0             0          0          0                9        0
    230  3712           0            0             0          0          0               10        0
    232  3744           0            0             0          0          0               11        0
    234  3776           0            0             0          0          0               12        0
    235  3792           0            0             0          0          0               13        0
    236  3808           0            0             0          0          0               14        0
    238  3840           0            0             0          0          0               15        0
    254  4096           0            0             0          0          0                1        0
    202  3264         0   ..         0             0          0          0                4        0
    206  3328         0   ..         0             0          0          0               13        0
    207  3344         0   ..         0             0          0          0                9        0
    208  3360         0   ..         0             0          0          0               14        0
    211  3408         0   ..         0             0          0          0                5        0
    212  3424         0   ..         0             0          0          0               16        0
    214  3456         0   ..         0             0          0          0               11        0
    217  3504         0   ..         0             0          0          0                6        0
    219  3536         0   ..         0             0          0          0               13        0
    222  3584         0   ..         0             0          0          0                7        0
    223  3600         0   ..         0             0          0          0               15        0
    225  3632         0   ..         0             0          0          0                8        0
    228  3680         0   ..         0             0          0          0                9        0
    230  3712         0   ..         0             0          0          0               10        0
    232  3744         0   ..         0             0          0          0               11        0
    234  3776         0   ..         0             0          0          0               12        0
    235  3792         0   ..         0             0          0          0               13        0
    236  3808         0   ..         0             0          0          0               14        0
    238  3840         0   ..         0             0          0          0               15        0
    254  4096         0   ..         0             0          0          0                1        0
  ...

Overall the combined zspage chain size effect on zsmalloc pool configuration:::
@@ -214,9 +229,10 @@ zram as a build artifacts storage (Linux kernel compilation).

  zsmalloc classes stats:::

    class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage freeable
    class  size       10%   ....    100% obj_allocated   obj_used pages_used pages_per_zspage freeable

    ...
    Total                13           51        413836     412973     159955                         3
    Total              13   ..        51        413836     412973     159955                         3

  zram mm_stat:::

@@ -227,9 +243,10 @@ zram as a build artifacts storage (Linux kernel compilation).

  zsmalloc classes stats:::

    class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage freeable
    class  size       10%   ....    100% obj_allocated   obj_used pages_used pages_per_zspage freeable

    ...
    Total                18           87        414852     412978     156666                         0
    Total              18   ..        87        414852     412978     156666                         0

  zram mm_stat:::

+47 −5
Original line number Diff line number Diff line
@@ -781,6 +781,33 @@ static int __dax_invalidate_entry(struct address_space *mapping,
	return ret;
}

static int __dax_clear_dirty_range(struct address_space *mapping,
		pgoff_t start, pgoff_t end)
{
	XA_STATE(xas, &mapping->i_pages, start);
	unsigned int scanned = 0;
	void *entry;

	xas_lock_irq(&xas);
	xas_for_each(&xas, entry, end) {
		entry = get_unlocked_entry(&xas, 0);
		xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
		xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
		put_unlocked_entry(&xas, entry, WAKE_NEXT);

		if (++scanned % XA_CHECK_SCHED)
			continue;

		xas_pause(&xas);
		xas_unlock_irq(&xas);
		cond_resched();
		xas_lock_irq(&xas);
	}
	xas_unlock_irq(&xas);

	return 0;
}

/*
 * Delete DAX entry at @index from @mapping.  Wait for it
 * to be unlocked before deleting it.
@@ -1258,15 +1285,20 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
	/* don't bother with blocks that are not shared to start with */
	if (!(iomap->flags & IOMAP_F_SHARED))
		return length;
	/* don't bother with holes or unwritten extents */
	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
		return length;

	id = dax_read_lock();
	ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL);
	if (ret < 0)
		goto out_unlock;

	/* zero the distance if srcmap is HOLE or UNWRITTEN */
	if (srcmap->flags & IOMAP_F_SHARED || srcmap->type == IOMAP_UNWRITTEN) {
		memset(daddr, 0, length);
		dax_flush(iomap->dax_dev, daddr, length);
		ret = length;
		goto out_unlock;
	}

	ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL);
	if (ret < 0)
		goto out_unlock;
@@ -1435,6 +1467,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
	 * written by write(2) is visible in mmap.
	 */
	if (iomap->flags & IOMAP_F_NEW || cow) {
		/*
		 * Filesystem allows CoW on non-shared extents. The src extents
		 * may have been mmapped with dirty mark before. To be able to
		 * invalidate its dax entries, we need to clear the dirty mark
		 * in advance.
		 */
		if (cow)
			__dax_clear_dirty_range(iomi->inode->i_mapping,
						pos >> PAGE_SHIFT,
						(end - 1) >> PAGE_SHIFT);
		invalidate_inode_pages2_range(iomi->inode->i_mapping,
					      pos >> PAGE_SHIFT,
					      (end - 1) >> PAGE_SHIFT);
@@ -2022,8 +2064,8 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,

	while ((ret = iomap_iter(&src_iter, ops)) > 0 &&
	       (ret = iomap_iter(&dst_iter, ops)) > 0) {
		compared = dax_range_compare_iter(&src_iter, &dst_iter, len,
						  same);
		compared = dax_range_compare_iter(&src_iter, &dst_iter,
				min(src_iter.len, dst_iter.len), same);
		if (compared < 0)
			return ret;
		src_iter.processed = dst_iter.processed = compared;
+1 −0
Original line number Diff line number Diff line
@@ -2219,6 +2219,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
	/* on-disk format */
	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
	binfo->bi_dat.bi_level = level;
	memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));

	return 0;
}
+1 −0
Original line number Diff line number Diff line
@@ -314,6 +314,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct,

	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
	binfo->bi_dat.bi_level = 0;
	memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));

	return 0;
}
Loading