Commit f36f3010 authored by Chunhai Guo's avatar Chunhai Guo Committed by Gao Xiang
Browse files

erofs: rename per-CPU buffers to global buffer pool and make it configurable



It will cost more time if compressed buffers are allocated on demand for
low-latency algorithms (like lz4) so EROFS uses per-CPU buffers to keep
compressed data if in-place decompression is unfulfilled.  While it is kind
of wasteful of memory for a device with hundreds of CPUs, and only a small
number of CPUs concurrently decompress most of the time.

This patch renames it as 'global buffer pool' and makes it configurable.
This allows two or more CPUs to share a common buffer to reduce memory
occupation.

Suggested-by: default avatarGao Xiang <xiang@kernel.org>
Reviewed-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: default avatarChunhai Guo <guochunhai@vivo.com>
Link: https://lore.kernel.org/r/20240402100036.2673604-1-guochunhai@vivo.com


Signed-off-by: default avatarSandeep Dhavale <dhavale@google.com>
Link: https://lore.kernel.org/r/20240408215231.3376659-1-dhavale@google.com


Signed-off-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
parent cacd5b04
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o zutil.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
+3 −3
Original line number Diff line number Diff line
@@ -54,7 +54,7 @@ static int z_erofs_load_lz4_config(struct super_block *sb,
	sbi->lz4.max_distance_pages = distance ?
					DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
					LZ4_MAX_DISTANCE_PAGES;
	return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
	return z_erofs_gbuf_growsize(sbi->lz4.max_pclusterblks);
}

/*
@@ -159,7 +159,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
docopy:
	/* Or copy compressed data which can be overlapped to per-CPU buffer */
	in = rq->in;
	src = erofs_get_pcpubuf(ctx->inpages);
	src = z_erofs_get_gbuf(ctx->inpages);
	if (!src) {
		DBG_BUGON(1);
		kunmap_local(inpage);
@@ -260,7 +260,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
	} else if (maptype == 1) {
		vm_unmap_ram(src, ctx->inpages);
	} else if (maptype == 2) {
		erofs_put_pcpubuf(src);
		z_erofs_put_gbuf(src);
	} else if (maptype != 3) {
		DBG_BUGON(1);
		return -EFAULT;
+7 −7
Original line number Diff line number Diff line
@@ -463,11 +463,11 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
					struct erofs_workgroup *egrp);
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
			    int flags);
void *erofs_get_pcpubuf(unsigned int requiredpages);
void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
void __init erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
void *z_erofs_get_gbuf(unsigned int requiredpages);
void z_erofs_put_gbuf(void *ptr);
int z_erofs_gbuf_growsize(unsigned int nrpages);
int __init z_erofs_gbuf_init(void);
void z_erofs_gbuf_exit(void);
int erofs_init_managed_cache(struct super_block *sb);
int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb);
#else
@@ -477,8 +477,8 @@ static inline int erofs_init_shrinker(void) { return 0; }
static inline void erofs_exit_shrinker(void) {}
static inline int z_erofs_init_zip_subsystem(void) { return 0; }
static inline void z_erofs_exit_zip_subsystem(void) {}
static inline void erofs_pcpubuf_init(void) {}
static inline void erofs_pcpubuf_exit(void) {}
static inline int z_erofs_gbuf_init(void) { return 0; }
static inline void z_erofs_gbuf_exit(void) {}
static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif	/* !CONFIG_EROFS_FS_ZIP */

fs/erofs/pcpubuf.c

deleted100644 → 0
+0 −148
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) Gao Xiang <xiang@kernel.org>
 *
 * For low-latency decompression algorithms (e.g. lz4), reserve consecutive
 * per-CPU virtual memory (in pages) in advance to store such inplace I/O
 * data if inplace decompression is failed (due to unmet inplace margin for
 * example).
 */
#include "internal.h"

struct erofs_pcpubuf {
	raw_spinlock_t lock;
	void *ptr;
	struct page **pages;
	unsigned int nrpages;
};

static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);

void *erofs_get_pcpubuf(unsigned int requiredpages)
	__acquires(pcb->lock)
{
	struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);

	raw_spin_lock(&pcb->lock);
	/* check if the per-CPU buffer is too small */
	if (requiredpages > pcb->nrpages) {
		raw_spin_unlock(&pcb->lock);
		put_cpu_var(erofs_pcb);
		/* (for sparse checker) pretend pcb->lock is still taken */
		__acquire(pcb->lock);
		return NULL;
	}
	return pcb->ptr;
}

void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
{
	struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());

	DBG_BUGON(pcb->ptr != ptr);
	raw_spin_unlock(&pcb->lock);
	put_cpu_var(erofs_pcb);
}

/* the next step: support per-CPU page buffers hotplug */
int erofs_pcpubuf_growsize(unsigned int nrpages)
{
	static DEFINE_MUTEX(pcb_resize_mutex);
	static unsigned int pcb_nrpages;
	struct page *pagepool = NULL;
	int delta, cpu, ret, i;

	mutex_lock(&pcb_resize_mutex);
	delta = nrpages - pcb_nrpages;
	ret = 0;
	/* avoid shrinking pcpubuf, since no idea how many fses rely on */
	if (delta <= 0)
		goto out;

	for_each_possible_cpu(cpu) {
		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
		struct page **pages, **oldpages;
		void *ptr, *old_ptr;

		pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
		if (!pages) {
			ret = -ENOMEM;
			break;
		}

		for (i = 0; i < nrpages; ++i) {
			pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
			if (!pages[i]) {
				ret = -ENOMEM;
				oldpages = pages;
				goto free_pagearray;
			}
		}
		ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
		if (!ptr) {
			ret = -ENOMEM;
			oldpages = pages;
			goto free_pagearray;
		}
		raw_spin_lock(&pcb->lock);
		old_ptr = pcb->ptr;
		pcb->ptr = ptr;
		oldpages = pcb->pages;
		pcb->pages = pages;
		i = pcb->nrpages;
		pcb->nrpages = nrpages;
		raw_spin_unlock(&pcb->lock);

		if (!oldpages) {
			DBG_BUGON(old_ptr);
			continue;
		}

		if (old_ptr)
			vunmap(old_ptr);
free_pagearray:
		while (i)
			erofs_pagepool_add(&pagepool, oldpages[--i]);
		kfree(oldpages);
		if (ret)
			break;
	}
	pcb_nrpages = nrpages;
	erofs_release_pages(&pagepool);
out:
	mutex_unlock(&pcb_resize_mutex);
	return ret;
}

void __init erofs_pcpubuf_init(void)
{
	int cpu;

	for_each_possible_cpu(cpu) {
		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);

		raw_spin_lock_init(&pcb->lock);
	}
}

void erofs_pcpubuf_exit(void)
{
	int cpu, i;

	for_each_possible_cpu(cpu) {
		struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);

		if (pcb->ptr) {
			vunmap(pcb->ptr);
			pcb->ptr = NULL;
		}
		if (!pcb->pages)
			continue;

		for (i = 0; i < pcb->nrpages; ++i)
			if (pcb->pages[i])
				put_page(pcb->pages[i]);
		kfree(pcb->pages);
		pcb->pages = NULL;
	}
}
+7 −2
Original line number Diff line number Diff line
@@ -859,7 +859,10 @@ static int __init erofs_module_init(void)
	if (err)
		goto deflate_err;

	erofs_pcpubuf_init();
	err = z_erofs_gbuf_init();
	if (err)
		goto gbuf_err;

	err = z_erofs_init_zip_subsystem();
	if (err)
		goto zip_err;
@@ -879,6 +882,8 @@ static int __init erofs_module_init(void)
sysfs_err:
	z_erofs_exit_zip_subsystem();
zip_err:
	z_erofs_gbuf_exit();
gbuf_err:
	z_erofs_deflate_exit();
deflate_err:
	z_erofs_lzma_exit();
@@ -902,7 +907,7 @@ static void __exit erofs_module_exit(void)
	z_erofs_lzma_exit();
	erofs_exit_shrinker();
	kmem_cache_destroy(erofs_inode_cachep);
	erofs_pcpubuf_exit();
	z_erofs_gbuf_exit();
}

static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
Loading