Commit df9c65b5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfs-6.7.iov_iter' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs

Pull iov_iter updates from Christian Brauner:
 "This contain's David's iov_iter cleanup work to convert the iov_iter
  iteration macros to inline functions:

   - Remove last_offset from iov_iter as it was only used by ITER_PIPE

   - Add a __user tag on copy_mc_to_user()'s dst argument on x86 to
     match that on powerpc and get rid of a sparse warning

   - Convert iter->user_backed to user_backed_iter() in the sound PCM
     driver

   - Convert iter->user_backed to user_backed_iter() in a couple of
     infiniband drivers

   - Renumber the type enum so that the ITER_* constants match the order
     in iterate_and_advance*()

   - Since the preceding patch puts UBUF and IOVEC at 0 and 1, change
     user_backed_iter() to just use the type value and get rid of the
     extra flag

   - Convert the iov_iter iteration macros to always-inline functions to
     make the code easier to follow. It uses function pointers, but they
     get optimised away

   - Move the check for ->copy_mc to _copy_from_iter() and
     copy_page_from_iter_atomic() rather than in memcpy_from_iter_mc()
     where it gets repeated for every segment. Instead, we check once
     and invoke a side function that can use iterate_bvec() rather than
     iterate_and_advance() and supply a different step function

   - Move the copy-and-csum code to net/ where it can be in proximity
     with the code that uses it

   - Fold memcpy_and_csum() in to its two users

   - Move csum_and_copy_from_iter_full() out of line and merge in
     csum_and_copy_from_iter() since the former is the only caller of
     the latter

   - Move hash_and_copy_to_iter() to net/ where it can be with its only
     caller"

* tag 'vfs-6.7.iov_iter' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs:
  iov_iter, net: Move hash_and_copy_to_iter() to net/
  iov_iter, net: Merge csum_and_copy_from_iter{,_full}() together
  iov_iter, net: Fold in csum_and_memcpy()
  iov_iter, net: Move csum_and_copy_to/from_iter() to net/
  iov_iter: Don't deal with iter->copy_mc in memcpy_from_iter_mc()
  iov_iter: Convert iterate*() to inline funcs
  iov_iter: Derive user-backedness from the iterator type
  iov_iter: Renumber ITER_* constants
  infiniband: Use user_backed_iter() to see if iterator is UBUF/IOVEC
  sound: Fix snd_pcm_readv()/writev() to use iov access functions
  iov_iter, x86: Be consistent about the __user tag on copy_mc_to_user()
  iov_iter: Remove last_offset from iov_iter as it was for ITER_PIPE
parents 3b3f874c b5f0e20f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -496,7 +496,7 @@ copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel

unsigned long __must_check
copy_mc_to_user(void *to, const void *from, unsigned len);
copy_mc_to_user(void __user *to, const void *from, unsigned len);
#endif

/*
+4 −4
Original line number Diff line number Diff line
@@ -70,23 +70,23 @@ unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigne
}
EXPORT_SYMBOL_GPL(copy_mc_to_kernel);

unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, unsigned len)
{
	unsigned long ret;

	if (copy_mc_fragile_enabled) {
		__uaccess_begin();
		ret = copy_mc_fragile(dst, src, len);
		ret = copy_mc_fragile((__force void *)dst, src, len);
		__uaccess_end();
		return ret;
	}

	if (static_cpu_has(X86_FEATURE_ERMS)) {
		__uaccess_begin();
		ret = copy_mc_enhanced_fast_string(dst, src, len);
		ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len);
		__uaccess_end();
		return ret;
	}

	return copy_user_generic(dst, src, len);
	return copy_user_generic((__force void *)dst, src, len);
}
+1 −1
Original line number Diff line number Diff line
@@ -267,7 +267,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)

	if (!HFI1_CAP_IS_KSET(SDMA))
		return -EINVAL;
	if (!from->user_backed)
	if (!user_backed_iter(from))
		return -EINVAL;
	idx = srcu_read_lock(&fd->pq_srcu);
	pq = srcu_dereference(fd->pq, &fd->pq_srcu);
+1 −1
Original line number Diff line number Diff line
@@ -2244,7 +2244,7 @@ static ssize_t qib_write_iter(struct kiocb *iocb, struct iov_iter *from)
	struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp);
	struct qib_user_sdma_queue *pq = fp->pq;

	if (!from->user_backed || !from->nr_segs || !pq)
	if (!user_backed_iter(from) || !from->nr_segs || !pq)
		return -EINVAL;

	return qib_user_sdma_writev(rcd, pq, iter_iov(from), from->nr_segs);
+274 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* I/O iterator iteration building functions.
 *
 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#ifndef _LINUX_IOV_ITER_H
#define _LINUX_IOV_ITER_H

#include <linux/uio.h>
#include <linux/bvec.h>

typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
			     void *priv, void *priv2);
typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len,
			      void *priv, void *priv2);

/*
 * Handle ITER_UBUF.
 */
static __always_inline
size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2,
		    iov_ustep_f step)
{
	void __user *base = iter->ubuf;
	size_t progress = 0, remain;

	remain = step(base + iter->iov_offset, 0, len, priv, priv2);
	progress = len - remain;
	iter->iov_offset += progress;
	iter->count -= progress;
	return progress;
}

/*
 * Handle ITER_IOVEC.
 */
static __always_inline
size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
		     iov_ustep_f step)
{
	const struct iovec *p = iter->__iov;
	size_t progress = 0, skip = iter->iov_offset;

	do {
		size_t remain, consumed;
		size_t part = min(len, p->iov_len - skip);

		if (likely(part)) {
			remain = step(p->iov_base + skip, progress, part, priv, priv2);
			consumed = part - remain;
			progress += consumed;
			skip += consumed;
			len -= consumed;
			if (skip < p->iov_len)
				break;
		}
		p++;
		skip = 0;
	} while (len);

	iter->nr_segs -= p - iter->__iov;
	iter->__iov = p;
	iter->iov_offset = skip;
	iter->count -= progress;
	return progress;
}

/*
 * Handle ITER_KVEC.
 */
static __always_inline
size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
		    iov_step_f step)
{
	const struct kvec *p = iter->kvec;
	size_t progress = 0, skip = iter->iov_offset;

	do {
		size_t remain, consumed;
		size_t part = min(len, p->iov_len - skip);

		if (likely(part)) {
			remain = step(p->iov_base + skip, progress, part, priv, priv2);
			consumed = part - remain;
			progress += consumed;
			skip += consumed;
			len -= consumed;
			if (skip < p->iov_len)
				break;
		}
		p++;
		skip = 0;
	} while (len);

	iter->nr_segs -= p - iter->kvec;
	iter->kvec = p;
	iter->iov_offset = skip;
	iter->count -= progress;
	return progress;
}

/*
 * Handle ITER_BVEC.
 */
static __always_inline
size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
		    iov_step_f step)
{
	const struct bio_vec *p = iter->bvec;
	size_t progress = 0, skip = iter->iov_offset;

	do {
		size_t remain, consumed;
		size_t offset = p->bv_offset + skip, part;
		void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE);

		part = min3(len,
			   (size_t)(p->bv_len - skip),
			   (size_t)(PAGE_SIZE - offset % PAGE_SIZE));
		remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
		kunmap_local(kaddr);
		consumed = part - remain;
		len -= consumed;
		progress += consumed;
		skip += consumed;
		if (skip >= p->bv_len) {
			skip = 0;
			p++;
		}
		if (remain)
			break;
	} while (len);

	iter->nr_segs -= p - iter->bvec;
	iter->bvec = p;
	iter->iov_offset = skip;
	iter->count -= progress;
	return progress;
}

/*
 * Handle ITER_XARRAY.
 */
static __always_inline
size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2,
		      iov_step_f step)
{
	struct folio *folio;
	size_t progress = 0;
	loff_t start = iter->xarray_start + iter->iov_offset;
	pgoff_t index = start / PAGE_SIZE;
	XA_STATE(xas, iter->xarray, index);

	rcu_read_lock();
	xas_for_each(&xas, folio, ULONG_MAX) {
		size_t remain, consumed, offset, part, flen;

		if (xas_retry(&xas, folio))
			continue;
		if (WARN_ON(xa_is_value(folio)))
			break;
		if (WARN_ON(folio_test_hugetlb(folio)))
			break;

		offset = offset_in_folio(folio, start + progress);
		flen = min(folio_size(folio) - offset, len);

		while (flen) {
			void *base = kmap_local_folio(folio, offset);

			part = min_t(size_t, flen,
				     PAGE_SIZE - offset_in_page(offset));
			remain = step(base, progress, part, priv, priv2);
			kunmap_local(base);

			consumed = part - remain;
			progress += consumed;
			len -= consumed;

			if (remain || len == 0)
				goto out;
			flen -= consumed;
			offset += consumed;
		}
	}

out:
	rcu_read_unlock();
	iter->iov_offset += progress;
	iter->count -= progress;
	return progress;
}

/*
 * Handle ITER_DISCARD.
 */
static __always_inline
size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2,
		      iov_step_f step)
{
	size_t progress = len;

	iter->count -= progress;
	return progress;
}

/**
 * iterate_and_advance2 - Iterate over an iterator
 * @iter: The iterator to iterate over.
 * @len: The amount to iterate over.
 * @priv: Data for the step functions.
 * @priv2: More data for the step functions.
 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
 * @step: Function for other iterators; given kernel addresses.
 *
 * Iterate over the next part of an iterator, up to the specified length.  The
 * buffer is presented in segments, which for kernel iteration are broken up by
 * physical pages and mapped, with the mapped address being presented.
 *
 * Two step functions, @step and @ustep, must be provided, one for handling
 * mapped kernel addresses and the other is given user addresses which have the
 * potential to fault since no pinning is performed.
 *
 * The step functions are passed the address and length of the segment, @priv,
 * @priv2 and the amount of data so far iterated over (which can, for example,
 * be added to @priv to point to the right part of a second buffer).  The step
 * functions should return the amount of the segment they didn't process (ie. 0
 * indicates complete processsing).
 *
 * This function returns the amount of data processed (ie. 0 means nothing was
 * processed and the value of @len means processes to completion).
 */
static __always_inline
size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
			    void *priv2, iov_ustep_f ustep, iov_step_f step)
{
	if (unlikely(iter->count < len))
		len = iter->count;
	if (unlikely(!len))
		return 0;

	if (likely(iter_is_ubuf(iter)))
		return iterate_ubuf(iter, len, priv, priv2, ustep);
	if (likely(iter_is_iovec(iter)))
		return iterate_iovec(iter, len, priv, priv2, ustep);
	if (iov_iter_is_bvec(iter))
		return iterate_bvec(iter, len, priv, priv2, step);
	if (iov_iter_is_kvec(iter))
		return iterate_kvec(iter, len, priv, priv2, step);
	if (iov_iter_is_xarray(iter))
		return iterate_xarray(iter, len, priv, priv2, step);
	return iterate_discard(iter, len, priv, priv2, step);
}

/**
 * iterate_and_advance - Iterate over an iterator
 * @iter: The iterator to iterate over.
 * @len: The amount to iterate over.
 * @priv: Data for the step functions.
 * @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
 * @step: Function for other iterators; given kernel addresses.
 *
 * As iterate_and_advance2(), but priv2 is always NULL.
 */
static __always_inline
size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
			   iov_ustep_f ustep, iov_step_f step)
{
	return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
}

#endif /* _LINUX_IOV_ITER_H */
Loading