Commit 7879d7af authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull misc VFS updates from Christian Brauner:
 "This contains the usual selections of misc updates for this cycle.

  Features:

   - Add ext4 IOCB_DONTCACHE support

     This refactors the address_space_operations write_begin() and
     write_end() callbacks to take const struct kiocb * as their first
     argument, allowing IOCB flags such as IOCB_DONTCACHE to propagate
     to the filesystem's buffered I/O path.

     Ext4 is updated to implement handling of the IOCB_DONTCACHE flag
     and advertises support via the FOP_DONTCACHE file operation flag.

     Additionally, the i915 driver's shmem write paths are updated to
     bypass the legacy write_begin/write_end interface in favor of
     directly calling write_iter() with a constructed synchronous kiocb.
     Another i915 change replaces a manual write loop with
     kernel_write() during GEM shmem object creation.

  Cleanups:

   - don't duplicate vfs_open() in kernel_file_open()

   - proc_fd_getattr(): don't bother with S_ISDIR() check

   - fs/ecryptfs: replace snprintf with sysfs_emit in show function

   - vfs: Remove unnecessary list_for_each_entry_safe() from
     evict_inodes()

   - filelock: add new locks_wake_up_waiter() helper

   - fs: Remove three arguments from block_write_end()

   - VFS: change old_dir and new_dir in struct renamedata to dentrys

   - netfs: Remove unused declaration netfs_queue_write_request()

  Fixes:

   - eventpoll: Fix semi-unbounded recursion

   - eventpoll: fix sphinx documentation build warning

   - fs/read_write: Fix spelling typo

   - fs: annotate data race between poll_schedule_timeout() and
     pollwake()

   - fs/pipe: set FMODE_NOWAIT in create_pipe_files()

   - docs/vfs: update references to i_mutex to i_rwsem

   - fs/buffer: remove comment about hard sectorsize

   - fs/buffer: remove the min and max limit checks in __getblk_slow()

   - fs/libfs: don't assume blocksize <= PAGE_SIZE in
     generic_check_addressable

   - fs_context: fix parameter name in infofc() macro

   - fs: Prevent file descriptor table allocations exceeding INT_MAX"

* tag 'vfs-6.17-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (24 commits)
  netfs: Remove unused declaration netfs_queue_write_request()
  eventpoll: fix sphinx documentation build warning
  ext4: support uncached buffered I/O
  mm/pagemap: add write_begin_get_folio() helper function
  fs: change write_begin/write_end interface to take struct kiocb *
  drm/i915: Refactor shmem_pwrite() to use kiocb and write_iter
  drm/i915: Use kernel_write() in shmem object create
  eventpoll: Fix semi-unbounded recursion
  vfs: Remove unnecessary list_for_each_entry_safe() from evict_inodes()
  fs/libfs: don't assume blocksize <= PAGE_SIZE in generic_check_addressable
  fs/buffer: remove the min and max limit checks in __getblk_slow()
  fs: Prevent file descriptor table allocations exceeding INT_MAX
  fs: Remove three arguments from block_write_end()
  fs/ecryptfs: replace snprintf with sysfs_emit in show function
  fs: annotate suspected data race between poll_schedule_timeout() and pollwake()
  docs/vfs: update references to i_mutex to i_rwsem
  fs/buffer: remove comment about hard sectorsize
  fs_context: fix parameter name in infofc() macro
  VFS: change old_dir and new_dir in struct renamedata to dentrys
  proc_fd_getattr(): don't bother with S_ISDIR() check
  ...
parents 794cbac9 4e8fc4f7
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -253,10 +253,10 @@ prototypes::
	int (*writepages)(struct address_space *, struct writeback_control *);
	bool (*dirty_folio)(struct address_space *, struct folio *folio);
	void (*readahead)(struct readahead_control *);
	int (*write_begin)(struct file *, struct address_space *mapping,
	int (*write_begin)(const struct kiocb *, struct address_space *mapping,
				loff_t pos, unsigned len,
				struct folio **foliop, void **fsdata);
	int (*write_end)(struct file *, struct address_space *mapping,
	int (*write_end)(const struct kiocb *, struct address_space *mapping,
				loff_t pos, unsigned len, unsigned copied,
				struct folio *folio, void *fsdata);
	sector_t (*bmap)(struct address_space *, sector_t);
+6 −5
Original line number Diff line number Diff line
@@ -758,8 +758,9 @@ process is more complicated and uses write_begin/write_end or
dirty_folio to write data into the address_space, and
writepages to writeback data to storage.

Adding and removing pages to/from an address_space is protected by the
inode's i_mutex.
Removing pages from an address_space requires holding the inode's i_rwsem
exclusively, while adding pages to the address_space requires holding the
inode's i_mapping->invalidate_lock exclusively.

When data is written to a page, the PG_Dirty flag should be set.  It
typically remains set until writepages asks for it to be written.  This
@@ -822,10 +823,10 @@ cache in your filesystem. The following members are defined:
		int (*writepages)(struct address_space *, struct writeback_control *);
		bool (*dirty_folio)(struct address_space *, struct folio *);
		void (*readahead)(struct readahead_control *);
		int (*write_begin)(struct file *, struct address_space *mapping,
		int (*write_begin)(const struct kiocb *, struct address_space *mapping,
				   loff_t pos, unsigned len,
				   struct page **pagep, void **fsdata);
		int (*write_end)(struct file *, struct address_space *mapping,
		int (*write_end)(const struct kiocb *, struct address_space *mapping,
				 loff_t pos, unsigned len, unsigned copied,
				 struct folio *folio, void *fsdata);
		sector_t (*bmap)(struct address_space *, sector_t);
+9 −6
Original line number Diff line number Diff line
@@ -496,18 +496,21 @@ static void blkdev_readahead(struct readahead_control *rac)
	mpage_readahead(rac, blkdev_get_block);
}

static int blkdev_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
static int blkdev_write_begin(const struct kiocb *iocb,
			      struct address_space *mapping, loff_t pos,
			      unsigned len, struct folio **foliop,
			      void **fsdata)
{
	return block_write_begin(mapping, pos, len, foliop, blkdev_get_block);
}

static int blkdev_write_end(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned copied, struct folio *folio,
		void *fsdata)
static int blkdev_write_end(const struct kiocb *iocb,
			    struct address_space *mapping,
			    loff_t pos, unsigned len, unsigned copied,
			    struct folio *folio, void *fsdata)
{
	int ret;
	ret = block_write_end(file, mapping, pos, len, copied, folio, fsdata);
	ret = block_write_end(pos, len, copied, folio);

	folio_unlock(folio);
	folio_put(folio);
+31 −84
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include <linux/pagevec.h>
#include <linux/shmem_fs.h>
#include <linux/swap.h>
#include <linux/uio.h>

#include <drm/drm_cache.h>

@@ -400,12 +401,12 @@ static int
shmem_pwrite(struct drm_i915_gem_object *obj,
	     const struct drm_i915_gem_pwrite *arg)
{
	struct address_space *mapping = obj->base.filp->f_mapping;
	const struct address_space_operations *aops = mapping->a_ops;
	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
	u64 remain;
	loff_t pos;
	unsigned int pg;
	struct file *file = obj->base.filp;
	struct kiocb kiocb;
	struct iov_iter iter;
	ssize_t written;
	u64 size = arg->size;

	/* Caller already validated user args */
	GEM_BUG_ON(!access_ok(user_data, arg->size));
@@ -428,63 +429,24 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
	if (obj->mm.madv != I915_MADV_WILLNEED)
		return -EFAULT;

	/*
	 * Before the pages are instantiated the object is treated as being
	 * in the CPU domain. The pages will be clflushed as required before
	 * use, and we can freely write into the pages directly. If userspace
	 * races pwrite with any other operation; corruption will ensue -
	 * that is userspace's prerogative!
	 */

	remain = arg->size;
	pos = arg->offset;
	pg = offset_in_page(pos);

	do {
		unsigned int len, unwritten;
		struct folio *folio;
		void *data, *vaddr;
		int err;
		char __maybe_unused c;
	if (size > MAX_RW_COUNT)
		return -EFBIG;

		len = PAGE_SIZE - pg;
		if (len > remain)
			len = remain;
	if (!file->f_op->write_iter)
		return -EINVAL;

		/* Prefault the user page to reduce potential recursion */
		err = __get_user(c, user_data);
		if (err)
			return err;
	init_sync_kiocb(&kiocb, file);
	kiocb.ki_pos = arg->offset;
	iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)user_data, size);

		err = __get_user(c, user_data + len - 1);
		if (err)
			return err;
	written = file->f_op->write_iter(&kiocb, &iter);
	BUG_ON(written == -EIOCBQUEUED);

		err = aops->write_begin(obj->base.filp, mapping, pos, len,
					&folio, &data);
		if (err < 0)
			return err;
	if (written != size)
		return -EIO;

		vaddr = kmap_local_folio(folio, offset_in_folio(folio, pos));
		pagefault_disable();
		unwritten = __copy_from_user_inatomic(vaddr, user_data, len);
		pagefault_enable();
		kunmap_local(vaddr);

		err = aops->write_end(obj->base.filp, mapping, pos, len,
				      len - unwritten, folio, data);
		if (err < 0)
			return err;

		/* We don't handle -EFAULT, leave it to the caller to check */
		if (unwritten)
			return -ENODEV;

		remain -= len;
		user_data += len;
		pos += len;
		pg = 0;
	} while (remain);
	if (written < 0)
		return written;

	return 0;
}
@@ -637,9 +599,8 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
{
	struct drm_i915_gem_object *obj;
	struct file *file;
	const struct address_space_operations *aops;
	loff_t pos;
	int err;
	loff_t pos = 0;
	ssize_t err;

	GEM_WARN_ON(IS_DGFX(i915));
	obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE));
@@ -649,29 +610,15 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);

	file = obj->base.filp;
	aops = file->f_mapping->a_ops;
	pos = 0;
	do {
		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
		struct folio *folio;
		void *fsdata;
	err = kernel_write(file, data, size, &pos);

		err = aops->write_begin(file, file->f_mapping, pos, len,
					&folio, &fsdata);
	if (err < 0)
		goto fail;

		memcpy_to_folio(folio, offset_in_folio(folio, pos), data, len);

		err = aops->write_end(file, file->f_mapping, pos, len, len,
				      folio, fsdata);
		if (err < 0)
	if (err != size) {
		err = -EIO;
		goto fail;

		size -= len;
		data += len;
		pos += len;
	} while (size);
	}

	return obj;

+5 −4
Original line number Diff line number Diff line
@@ -53,13 +53,14 @@ static void adfs_write_failed(struct address_space *mapping, loff_t to)
		truncate_pagecache(inode, inode->i_size);
}

static int adfs_write_begin(struct file *file, struct address_space *mapping,
static int adfs_write_begin(const struct kiocb *iocb,
			    struct address_space *mapping,
			    loff_t pos, unsigned len,
			    struct folio **foliop, void **fsdata)
{
	int ret;

	ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
	ret = cont_write_begin(iocb, mapping, pos, len, foliop, fsdata,
				adfs_get_block,
				&ADFS_I(mapping->host)->mmu_private);
	if (unlikely(ret))
Loading