Commit 1b0aabcc authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull misc vfs updates from Christian Brauner:
 "This contains the usual miscellaneous features, cleanups, and fixes
  for vfs and individual fses.

  Features:

   - Free up FMODE_* bits. I've freed up bits 6, 7, 8, and 24. That
     means we now have six free FMODE_* bits in total (but bit #6
     already got used for FMODE_WRITE_RESTRICTED)

   - Add FOP_HUGE_PAGES flag (follow-up to FMODE_* cleanup)

   - Add fd_raw cleanup class so we can make use of automatic cleanup
     provided by CLASS(fd_raw, f)(fd) for O_PATH fds as well

   - Optimize seq_puts()

   - Simplify __seq_puts()

   - Add new anon_inode_getfile_fmode() api to allow specifying f_mode
     instead of open-coding it in multiple places

   - Annotate struct file_handle with __counted_by() and use
     struct_size()

   - Warn in get_file() whether f_count resurrection from zero is
     attempted (epoll/drm discussion)

   - Folio-sophize aio

   - Export the subvolume id in statx() for both btrfs and bcachefs

   - Relax linkat(AT_EMPTY_PATH) requirements

   - Add F_DUPFD_QUERY fcntl() allowing to compare two file descriptors
     for dup*() equality replacing kcmp()

  Cleanups:

   - Compile out swapfile inode checks when swap isn't enabled

   - Use (1 << n) notation for FMODE_* bitshifts for clarity

   - Remove redundant variable assignment in fs/direct-io

   - Cleanup uses of strncpy in orangefs

   - Speed up and cleanup writeback

   - Move fsparam_string_empty() helper into header since it's currently
     open-coded in multiple places

   - Add kernel-doc comments to proc_create_net_data_write()

   - Don't needlessly read dentry->d_flags twice

  Fixes:

   - Fix out-of-range warning in nilfs2

   - Fix ecryptfs overflow due to wrong encryption packet size
     calculation

   - Fix overly long line in xfs file_operations (follow-up to FMODE_*
     cleanup)

   - Don't raise FOP_BUFFER_{R,W}ASYNC for directories in xfs (follow-up
     to FMODE_* cleanup)

   - Don't call xfs_file_open from xfs_dir_open (follow-up to FMODE_*
     cleanup)

   - Fix stable offset api to prevent endless loops

   - Fix afs file server rotations

   - Prevent xattr node from overflowing the eraseblock in jffs2

   - Move fdinfo PTRACE_MODE_READ procfs check into the .permission()
     operation instead of .open() operation since this caused userspace
     regressions"

* tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (39 commits)
  afs: Fix fileserver rotation getting stuck
  selftests: add F_DUPDFD_QUERY selftests
  fcntl: add F_DUPFD_QUERY fcntl()
  file: add fd_raw cleanup class
  fs: WARN when f_count resurrection is attempted
  seq_file: Simplify __seq_puts()
  seq_file: Optimize seq_puts()
  proc: Move fdinfo PTRACE_MODE_READ check into the inode .permission operation
  fs: Create anon_inode_getfile_fmode()
  xfs: don't call xfs_file_open from xfs_dir_open
  xfs: drop fop_flags for directories
  xfs: fix overly long line in the file_operations
  shmem: Fix shmem_rename2()
  libfs: Add simple_offset_rename() API
  libfs: Fix simple_offset_rename_exchange()
  jffs2: prevent xattr node from overflowing the eraseblock
  vfs, swap: compile out IS_SWAPFILE() on swapless configs
  vfs: relax linkat() AT_EMPTY_PATH - aka flink() - requirements
  fs/direct-io: remove redundant assignment to variable retval
  fs/dcache: Re-use value stored to dentry->d_flags instead of re-reading
  ...
parents c117a437 da0e01cc
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -912,7 +912,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
		disk_unblock_events(disk);

	bdev_file->f_flags |= O_LARGEFILE;
	bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
	bdev_file->f_mode |= FMODE_CAN_ODIRECT;
	if (bdev_nowait(bdev))
		bdev_file->f_mode |= FMODE_NOWAIT;
	if (mode & BLK_OPEN_RESTRICT_WRITES)
+1 −0
Original line number Diff line number Diff line
@@ -863,6 +863,7 @@ const struct file_operations def_blk_fops = {
	.splice_read	= filemap_splice_read,
	.splice_write	= iter_file_splice_write,
	.fallocate	= blkdev_fallocate,
	.fop_flags	= FOP_BUFFER_RASYNC,
};

static __init int blkdev_init(void)
+1 −1
Original line number Diff line number Diff line
@@ -377,7 +377,7 @@ static const struct file_operations dax_fops = {
	.release = dax_release,
	.get_unmapped_area = dax_get_unmapped_area,
	.mmap = dax_mmap,
	.mmap_supported_flags = MAP_SYNC,
	.fop_flags = FOP_MMAP_SYNC,
};

static void dev_dax_cdev_del(void *cdev)
+6 −2
Original line number Diff line number Diff line
@@ -541,11 +541,13 @@ bool afs_select_fileserver(struct afs_operation *op)
		    test_bit(AFS_SE_EXCLUDED, &se->flags) ||
		    !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
			continue;
		es = op->server_states->endpoint_state;
		es = op->server_states[i].endpoint_state;
		sal = es->addresses;

		afs_get_address_preferences_rcu(op->net, sal);
		for (j = 0; j < sal->nr_addrs; j++) {
			if (es->failed_set & (1 << j))
				continue;
			if (!sal->addrs[j].peer)
				continue;
			if (sal->addrs[j].prio > best_prio) {
@@ -605,6 +607,8 @@ bool afs_select_fileserver(struct afs_operation *op)
	best_prio = -1;
	addr_index = 0;
	for (i = 0; i < alist->nr_addrs; i++) {
		if (!(set & (1 << i)))
			continue;
		if (alist->addrs[i].prio > best_prio) {
			addr_index = i;
			best_prio = alist->addrs[i].prio;
@@ -674,7 +678,7 @@ bool afs_select_fileserver(struct afs_operation *op)
	for (i = 0; i < op->server_list->nr_servers; i++) {
		struct afs_endpoint_state *estate;

		estate = op->server_states->endpoint_state;
		estate = op->server_states[i].endpoint_state;
		error = READ_ONCE(estate->error);
		if (error < 0)
			afs_op_accumulate_error(op, error, estate->abort_code);
+47 −44
Original line number Diff line number Diff line
@@ -122,7 +122,7 @@ struct kioctx {
	unsigned long		mmap_base;
	unsigned long		mmap_size;

	struct page		**ring_pages;
	struct folio		**ring_folios;
	long			nr_pages;

	struct rcu_work		free_rwork;	/* see free_ioctx() */
@@ -160,7 +160,7 @@ struct kioctx {
		spinlock_t	completion_lock;
	} ____cacheline_aligned_in_smp;

	struct page		*internal_pages[AIO_RING_PAGES];
	struct folio		*internal_folios[AIO_RING_PAGES];
	struct file		*aio_ring_file;

	unsigned		id;
@@ -334,19 +334,20 @@ static void aio_free_ring(struct kioctx *ctx)
	put_aio_ring_file(ctx);

	for (i = 0; i < ctx->nr_pages; i++) {
		struct page *page;
		pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
				page_count(ctx->ring_pages[i]));
		page = ctx->ring_pages[i];
		if (!page)
		struct folio *folio = ctx->ring_folios[i];

		if (!folio)
			continue;
		ctx->ring_pages[i] = NULL;
		put_page(page);

		pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
			 folio_ref_count(folio));
		ctx->ring_folios[i] = NULL;
		folio_put(folio);
	}

	if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
		kfree(ctx->ring_pages);
		ctx->ring_pages = NULL;
	if (ctx->ring_folios && ctx->ring_folios != ctx->internal_folios) {
		kfree(ctx->ring_folios);
		ctx->ring_folios = NULL;
	}
}

@@ -441,7 +442,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
	idx = src->index;
	if (idx < (pgoff_t)ctx->nr_pages) {
		/* Make sure the old folio hasn't already been changed */
		if (ctx->ring_pages[idx] != &src->page)
		if (ctx->ring_folios[idx] != src)
			rc = -EAGAIN;
	} else
		rc = -EINVAL;
@@ -465,8 +466,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst,
	 */
	spin_lock_irqsave(&ctx->completion_lock, flags);
	folio_migrate_copy(dst, src);
	BUG_ON(ctx->ring_pages[idx] != &src->page);
	ctx->ring_pages[idx] = &dst->page;
	BUG_ON(ctx->ring_folios[idx] != src);
	ctx->ring_folios[idx] = dst;
	spin_unlock_irqrestore(&ctx->completion_lock, flags);

	/* The old folio is no longer accessible. */
@@ -516,28 +517,30 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
			/ sizeof(struct io_event);

	ctx->ring_pages = ctx->internal_pages;
	ctx->ring_folios = ctx->internal_folios;
	if (nr_pages > AIO_RING_PAGES) {
		ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
		ctx->ring_folios = kcalloc(nr_pages, sizeof(struct folio *),
					   GFP_KERNEL);
		if (!ctx->ring_pages) {
		if (!ctx->ring_folios) {
			put_aio_ring_file(ctx);
			return -ENOMEM;
		}
	}

	for (i = 0; i < nr_pages; i++) {
		struct page *page;
		page = find_or_create_page(file->f_mapping,
					   i, GFP_USER | __GFP_ZERO);
		if (!page)
		struct folio *folio;

		folio = __filemap_get_folio(file->f_mapping, i,
					    FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
					    GFP_USER | __GFP_ZERO);
		if (IS_ERR(folio))
			break;
		pr_debug("pid(%d) page[%d]->count=%d\n",
			 current->pid, i, page_count(page));
		SetPageUptodate(page);
		unlock_page(page);

		ctx->ring_pages[i] = page;
		pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
			 folio_ref_count(folio));
		folio_end_read(folio, true);

		ctx->ring_folios[i] = folio;
	}
	ctx->nr_pages = i;

@@ -570,7 +573,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
	ctx->user_id = ctx->mmap_base;
	ctx->nr_events = nr_events; /* trusted copy */

	ring = page_address(ctx->ring_pages[0]);
	ring = folio_address(ctx->ring_folios[0]);
	ring->nr = nr_events;	/* user copy */
	ring->id = ~0U;
	ring->head = ring->tail = 0;
@@ -578,7 +581,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
	ring->compat_features = AIO_RING_COMPAT_FEATURES;
	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
	ring->header_length = sizeof(struct aio_ring);
	flush_dcache_page(ctx->ring_pages[0]);
	flush_dcache_folio(ctx->ring_folios[0]);

	return 0;
}
@@ -689,9 +692,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)

					/* While kioctx setup is in progress,
					 * we are protected from page migration
					 * changes ring_pages by ->ring_lock.
					 * changes ring_folios by ->ring_lock.
					 */
					ring = page_address(ctx->ring_pages[0]);
					ring = folio_address(ctx->ring_folios[0]);
					ring->id = ctx->id;
					return 0;
				}
@@ -1033,7 +1036,7 @@ static void user_refill_reqs_available(struct kioctx *ctx)
		 * against ctx->completed_events below will make sure we do the
		 * safe/right thing.
		 */
		ring = page_address(ctx->ring_pages[0]);
		ring = folio_address(ctx->ring_folios[0]);
		head = ring->head;

		refill_reqs_available(ctx, head, ctx->tail);
@@ -1145,12 +1148,12 @@ static void aio_complete(struct aio_kiocb *iocb)
	if (++tail >= ctx->nr_events)
		tail = 0;

	ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
	ev_page = folio_address(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);
	event = ev_page + pos % AIO_EVENTS_PER_PAGE;

	*event = iocb->ki_res;

	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
	flush_dcache_folio(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);

	pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
		 (void __user *)(unsigned long)iocb->ki_res.obj,
@@ -1163,10 +1166,10 @@ static void aio_complete(struct aio_kiocb *iocb)

	ctx->tail = tail;

	ring = page_address(ctx->ring_pages[0]);
	ring = folio_address(ctx->ring_folios[0]);
	head = ring->head;
	ring->tail = tail;
	flush_dcache_page(ctx->ring_pages[0]);
	flush_dcache_folio(ctx->ring_folios[0]);

	ctx->completed_events++;
	if (ctx->completed_events > 1)
@@ -1238,8 +1241,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
	sched_annotate_sleep();
	mutex_lock(&ctx->ring_lock);

	/* Access to ->ring_pages here is protected by ctx->ring_lock. */
	ring = page_address(ctx->ring_pages[0]);
	/* Access to ->ring_folios here is protected by ctx->ring_lock. */
	ring = folio_address(ctx->ring_folios[0]);
	head = ring->head;
	tail = ring->tail;

@@ -1260,20 +1263,20 @@ static long aio_read_events_ring(struct kioctx *ctx,
	while (ret < nr) {
		long avail;
		struct io_event *ev;
		struct page *page;
		struct folio *folio;

		avail = (head <= tail ?  tail : ctx->nr_events) - head;
		if (head == tail)
			break;

		pos = head + AIO_EVENTS_OFFSET;
		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
		folio = ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE];
		pos %= AIO_EVENTS_PER_PAGE;

		avail = min(avail, nr - ret);
		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);

		ev = page_address(page);
		ev = folio_address(folio);
		copy_ret = copy_to_user(event + ret, ev + pos,
					sizeof(*ev) * avail);

@@ -1287,9 +1290,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
		head %= ctx->nr_events;
	}

	ring = page_address(ctx->ring_pages[0]);
	ring = folio_address(ctx->ring_folios[0]);
	ring->head = head;
	flush_dcache_page(ctx->ring_pages[0]);
	flush_dcache_folio(ctx->ring_folios[0]);

	pr_debug("%li  h%u t%u\n", ret, head, tail);
out:
Loading