Commit 6020c204 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull folio conversion updates from Matthew Wilcox:
 "Convert much of the page cache to use folios

  This stops just short of actually enabling large folios. It converts
  everything that I noticed needs to be converted, but there may still
  be places I've overlooked which still have page size assumptions.

  The big change here is using large entries in the page cache XArray
  instead of many small entries. That only affects shmem for now, but
  it's a pretty big change for shmem since it changes where memory needs
  to be allocated (at split time instead of insertion)"

* tag 'folio-5.17' of git://git.infradead.org/users/willy/pagecache: (49 commits)
  mm: Use multi-index entries in the page cache
  XArray: Add xas_advance()
  truncate,shmem: Handle truncates that split large folios
  truncate: Convert invalidate_inode_pages2_range to folios
  fs: Convert vfs_dedupe_file_range_compare to folios
  mm: Remove pagevec_remove_exceptionals()
  mm: Convert find_lock_entries() to use a folio_batch
  filemap: Return only folios from find_get_entries()
  filemap: Convert filemap_get_read_batch() to use a folio_batch
  filemap: Convert filemap_read() to use a folio
  truncate: Add invalidate_complete_folio2()
  truncate: Convert invalidate_inode_pages2_range() to use a folio
  truncate: Skip known-truncated indices
  truncate,shmem: Add truncate_inode_folio()
  shmem: Convert part of shmem_undo_range() to use a folio
  mm: Add unmap_mapping_folio()
  truncate: Add truncate_cleanup_folio()
  filemap: Add filemap_release_folio()
  filemap: Use a folio in filemap_page_mkwrite
  filemap: Use a folio in filemap_map_pages
  ...
parents 81ff0be4 6b24ca4a
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@
#include <linux/fscrypt.h>
#include <linux/fsverity.h>

struct pagevec;

#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition)	BUG_ON(condition)
#else
+13 −11
Original line number Diff line number Diff line
@@ -372,7 +372,7 @@ static bool inode_do_switch_wbs(struct inode *inode,
{
	struct address_space *mapping = inode->i_mapping;
	XA_STATE(xas, &mapping->i_pages, 0);
	struct page *page;
	struct folio *folio;
	bool switched = false;

	spin_lock(&inode->i_lock);
@@ -389,21 +389,23 @@ static bool inode_do_switch_wbs(struct inode *inode,

	/*
	 * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
	 * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
	 * pages actually under writeback.
	 * to possibly dirty folios while PAGECACHE_TAG_WRITEBACK points to
	 * folios actually under writeback.
	 */
	xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
		if (PageDirty(page)) {
			dec_wb_stat(old_wb, WB_RECLAIMABLE);
			inc_wb_stat(new_wb, WB_RECLAIMABLE);
	xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
		if (folio_test_dirty(folio)) {
			long nr = folio_nr_pages(folio);
			wb_stat_mod(old_wb, WB_RECLAIMABLE, -nr);
			wb_stat_mod(new_wb, WB_RECLAIMABLE, nr);
		}
	}

	xas_set(&xas, 0);
	xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
		WARN_ON_ONCE(!PageWriteback(page));
		dec_wb_stat(old_wb, WB_WRITEBACK);
		inc_wb_stat(new_wb, WB_WRITEBACK);
	xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
		long nr = folio_nr_pages(folio);
		WARN_ON_ONCE(!folio_test_writeback(folio));
		wb_stat_mod(old_wb, WB_WRITEBACK, -nr);
		wb_stat_mod(new_wb, WB_WRITEBACK, nr);
	}

	if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+55 −61
Original line number Diff line number Diff line
@@ -146,41 +146,41 @@ static int generic_remap_check_len(struct inode *inode_in,
}

/* Read a page's worth of file data into the page cache. */
static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
static struct folio *vfs_dedupe_get_folio(struct inode *inode, loff_t pos)
{
	struct page *page;
	struct folio *folio;

	page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
	if (IS_ERR(page))
		return page;
	if (!PageUptodate(page)) {
		put_page(page);
	folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, NULL);
	if (IS_ERR(folio))
		return folio;
	if (!folio_test_uptodate(folio)) {
		folio_put(folio);
		return ERR_PTR(-EIO);
	}
	return page;
	return folio;
}

/*
 * Lock two pages, ensuring that we lock in offset order if the pages are from
 * the same file.
 * Lock two folios, ensuring that we lock in offset order if the folios
 * are from the same file.
 */
static void vfs_lock_two_pages(struct page *page1, struct page *page2)
static void vfs_lock_two_folios(struct folio *folio1, struct folio *folio2)
{
	/* Always lock in order of increasing index. */
	if (page1->index > page2->index)
		swap(page1, page2);
	if (folio1->index > folio2->index)
		swap(folio1, folio2);

	lock_page(page1);
	if (page1 != page2)
		lock_page(page2);
	folio_lock(folio1);
	if (folio1 != folio2)
		folio_lock(folio2);
}

/* Unlock two pages, being careful not to unlock the same page twice. */
static void vfs_unlock_two_pages(struct page *page1, struct page *page2)
/* Unlock two folios, being careful not to unlock the same folio twice. */
static void vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2)
{
	unlock_page(page1);
	if (page1 != page2)
		unlock_page(page2);
	folio_unlock(folio1);
	if (folio1 != folio2)
		folio_unlock(folio2);
}

/*
@@ -188,77 +188,71 @@ static void vfs_unlock_two_pages(struct page *page1, struct page *page2)
 * Caller must have locked both inodes to prevent write races.
 */
static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
					 struct inode *dest, loff_t destoff,
					 struct inode *dest, loff_t dstoff,
					 loff_t len, bool *is_same)
{
	loff_t src_poff;
	loff_t dest_poff;
	void *src_addr;
	void *dest_addr;
	struct page *src_page;
	struct page *dest_page;
	loff_t cmp_len;
	bool same;
	int error;

	error = -EINVAL;
	same = true;
	bool same = true;
	int error = -EINVAL;

	while (len) {
		src_poff = srcoff & (PAGE_SIZE - 1);
		dest_poff = destoff & (PAGE_SIZE - 1);
		cmp_len = min(PAGE_SIZE - src_poff,
			      PAGE_SIZE - dest_poff);
		struct folio *src_folio, *dst_folio;
		void *src_addr, *dst_addr;
		loff_t cmp_len = min(PAGE_SIZE - offset_in_page(srcoff),
				     PAGE_SIZE - offset_in_page(dstoff));

		cmp_len = min(cmp_len, len);
		if (cmp_len <= 0)
			goto out_error;

		src_page = vfs_dedupe_get_page(src, srcoff);
		if (IS_ERR(src_page)) {
			error = PTR_ERR(src_page);
		src_folio = vfs_dedupe_get_folio(src, srcoff);
		if (IS_ERR(src_folio)) {
			error = PTR_ERR(src_folio);
			goto out_error;
		}
		dest_page = vfs_dedupe_get_page(dest, destoff);
		if (IS_ERR(dest_page)) {
			error = PTR_ERR(dest_page);
			put_page(src_page);
		dst_folio = vfs_dedupe_get_folio(dest, dstoff);
		if (IS_ERR(dst_folio)) {
			error = PTR_ERR(dst_folio);
			folio_put(src_folio);
			goto out_error;
		}

		vfs_lock_two_pages(src_page, dest_page);
		vfs_lock_two_folios(src_folio, dst_folio);

		/*
		 * Now that we've locked both pages, make sure they're still
		 * Now that we've locked both folios, make sure they're still
		 * mapped to the file data we're interested in.  If not,
		 * someone is invalidating pages on us and we lose.
		 */
		if (!PageUptodate(src_page) || !PageUptodate(dest_page) ||
		    src_page->mapping != src->i_mapping ||
		    dest_page->mapping != dest->i_mapping) {
		if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) ||
		    src_folio->mapping != src->i_mapping ||
		    dst_folio->mapping != dest->i_mapping) {
			same = false;
			goto unlock;
		}

		src_addr = kmap_atomic(src_page);
		dest_addr = kmap_atomic(dest_page);
		src_addr = kmap_local_folio(src_folio,
					offset_in_folio(src_folio, srcoff));
		dst_addr = kmap_local_folio(dst_folio,
					offset_in_folio(dst_folio, dstoff));

		flush_dcache_page(src_page);
		flush_dcache_page(dest_page);
		flush_dcache_folio(src_folio);
		flush_dcache_folio(dst_folio);

		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
		if (memcmp(src_addr, dst_addr, cmp_len))
			same = false;

		kunmap_atomic(dest_addr);
		kunmap_atomic(src_addr);
		kunmap_local(dst_addr);
		kunmap_local(src_addr);
unlock:
		vfs_unlock_two_pages(src_page, dest_page);
		put_page(dest_page);
		put_page(src_page);
		vfs_unlock_two_folios(src_folio, dst_folio);
		folio_put(dst_folio);
		folio_put(src_folio);

		if (!same)
			break;

		srcoff += cmp_len;
		destoff += cmp_len;
		dstoff += cmp_len;
		len -= cmp_len;
	}

+14 −0
Original line number Diff line number Diff line
@@ -274,6 +274,15 @@ static inline int thp_nr_pages(struct page *page)
	return 1;
}

/**
 * folio_test_pmd_mappable - Can we map this folio with a PMD?
 * @folio: The folio to test
 */
static inline bool folio_test_pmd_mappable(struct folio *folio)
{
	return folio_order(folio) >= HPAGE_PMD_ORDER;
}

struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
		pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
@@ -339,6 +348,11 @@ static inline int thp_nr_pages(struct page *page)
	return 1;
}

static inline bool folio_test_pmd_mappable(struct folio *folio)
{
	return false;
}

static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
{
	return false;
+21 −47
Original line number Diff line number Diff line
@@ -714,6 +714,27 @@ int vma_is_stack_for_current(struct vm_area_struct *vma);
struct mmu_gather;
struct inode;

static inline unsigned int compound_order(struct page *page)
{
	if (!PageHead(page))
		return 0;
	return page[1].compound_order;
}

/**
 * folio_order - The allocation order of a folio.
 * @folio: The folio.
 *
 * A folio is composed of 2^order pages.  See get_order() for the definition
 * of order.
 *
 * Return: The order of the folio.
 */
static inline unsigned int folio_order(struct folio *folio)
{
	return compound_order(&folio->page);
}

#include <linux/huge_mm.h>

/*
@@ -913,27 +934,6 @@ static inline void destroy_compound_page(struct page *page)
	compound_page_dtors[page[1].compound_dtor](page);
}

static inline unsigned int compound_order(struct page *page)
{
	if (!PageHead(page))
		return 0;
	return page[1].compound_order;
}

/**
 * folio_order - The allocation order of a folio.
 * @folio: The folio.
 *
 * A folio is composed of 2^order pages.  See get_order() for the definition
 * of order.
 *
 * Return: The order of the folio.
 */
static inline unsigned int folio_order(struct folio *folio)
{
	return compound_order(&folio->page);
}

static inline bool hpage_pincount_available(struct page *page)
{
	/*
@@ -1837,28 +1837,6 @@ static inline bool can_do_mlock(void) { return false; }
extern int user_shm_lock(size_t, struct ucounts *);
extern void user_shm_unlock(size_t, struct ucounts *);

/*
 * Parameter block passed down to zap_pte_range in exceptional cases.
 */
struct zap_details {
	struct address_space *zap_mapping;	/* Check page->mapping if set */
	struct page *single_page;		/* Locked page to be unmapped */
};

/*
 * We set details->zap_mappings when we want to unmap shared but keep private
 * pages. Return true if skip zapping this page, false otherwise.
 */
static inline bool
zap_skip_check_mapping(struct zap_details *details, struct page *page)
{
	if (!details || !page)
		return false;

	return details->zap_mapping &&
	    (details->zap_mapping != page_rmapping(page));
}

struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
			     pte_t pte);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -1893,7 +1871,6 @@ extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize);
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
int truncate_inode_page(struct address_space *mapping, struct page *page);
int generic_error_remove_page(struct address_space *mapping, struct page *page);
int invalidate_inode_page(struct page *page);

@@ -1904,7 +1881,6 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
extern int fixup_user_fault(struct mm_struct *mm,
			    unsigned long address, unsigned int fault_flags,
			    bool *unlocked);
void unmap_mapping_page(struct page *page);
void unmap_mapping_pages(struct address_space *mapping,
		pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
@@ -1925,7 +1901,6 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
	BUG();
	return -EFAULT;
}
static inline void unmap_mapping_page(struct page *page) { }
static inline void unmap_mapping_pages(struct address_space *mapping,
		pgoff_t start, pgoff_t nr, bool even_cows) { }
static inline void unmap_mapping_range(struct address_space *mapping,
@@ -1982,7 +1957,6 @@ int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
			struct page **pages);
struct page *get_dump_page(unsigned long addr);

extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
extern void do_invalidatepage(struct page *page, unsigned int offset,
			      unsigned int length);

Loading