Commit 81538c8e authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull nfsd updates from Chuck Lever:
 "Mike Snitzer has prototyped a mechanism for disabling I/O caching in
  NFSD. This is introduced in v6.18 as an experimental feature. This
  enables scaling NFSD in /both/ directions:

   - NFS service can be supported on systems with small memory
     footprints, such as low-cost cloud instances

   - Large NFS workloads will be less likely to force the eviction of
     server-local activity, helping it avoid thrashing

  Jeff Layton contributed a number of fixes to the new attribute
  delegation implementation (based on a pending Internet RFC) that we
  hope will make attribute delegation reliable enough to enable by
  default, as it is on the Linux NFS client.

  The remaining patches in this pull request are clean-ups and minor
  optimizations. Many thanks to the contributors, reviewers, testers,
  and bug reporters who participated during the v6.18 NFSD development
  cycle"

* tag 'nfsd-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (42 commits)
  nfsd: discard nfserr_dropit
  SUNRPC: Make RPCSEC_GSS_KRB5 select CRYPTO instead of depending on it
  NFSD: Add io_cache_{read,write} controls to debugfs
  NFSD: Do the grace period check in ->proc_layoutget
  nfsd: delete unnecessary NULL check in __fh_verify()
  NFSD: Allow layoutcommit during grace period
  NFSD: Disallow layoutget during grace period
  sunrpc: fix "occurence"->"occurrence"
  nfsd: Don't force CRYPTO_LIB_SHA256 to be built-in
  nfsd: nfserr_jukebox in nlm_fopen should lead to a retry
  NFSD: Reduce DRC bucket size
  NFSD: Delay adding new entries to LRU
  SUNRPC: Move the svc_rpcb_cleanup() call sites
  NFS: Remove rpcbind cleanup for NFSv4.0 callback
  nfsd: unregister with rpcbind when deleting a transport
  NFSD: Drop redundant conversion to bool
  sunrpc: eliminate return pointer in svc_tcp_sendmsg()
  sunrpc: fix pr_notice in svc_tcp_sendto() to show correct length
  nfsd: decouple the xprtsec policy check from check_nfsd_access()
  NFSD: Fix destination buffer size in nfsd4_ssc_setup_dul()
  ...
parents 256e3417 73cc6ec1
Loading
Loading
Loading
Loading
+19 −25
Original line number Diff line number Diff line
@@ -286,20 +286,12 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr)
	unsigned int ia_valid = attr->ia_valid;
	struct timespec64 now;

	if (ia_valid & ATTR_CTIME) {
		/*
		 * In the case of an update for a write delegation, we must respect
		 * the value in ia_ctime and not use the current time.
		 */
		if (ia_valid & ATTR_DELEG)
	if (ia_valid & ATTR_CTIME_SET)
		now = inode_set_ctime_deleg(inode, attr->ia_ctime);
		else
	else if (ia_valid & ATTR_CTIME)
		now = inode_set_ctime_current(inode);
	} else {
		/* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */
		WARN_ON_ONCE(ia_valid & ATTR_MTIME);
	else
		now = current_time(inode);
	}

	if (ia_valid & ATTR_ATIME_SET)
		inode_set_atime_to_ts(inode, attr->ia_atime);
@@ -359,13 +351,12 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
		inode_set_atime_to_ts(inode, attr->ia_atime);
	if (ia_valid & ATTR_MTIME)
		inode_set_mtime_to_ts(inode, attr->ia_mtime);
	if (ia_valid & ATTR_CTIME) {
		if (ia_valid & ATTR_DELEG)

	if (ia_valid & ATTR_CTIME_SET)
		inode_set_ctime_deleg(inode, attr->ia_ctime);
		else
	else if (ia_valid & ATTR_CTIME)
		inode_set_ctime_to_ts(inode, attr->ia_ctime);
}
}
EXPORT_SYMBOL(setattr_copy);

int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
@@ -463,15 +454,18 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,

	now = current_time(inode);

	attr->ia_ctime = now;
	if (!(ia_valid & ATTR_ATIME_SET))
		attr->ia_atime = now;
	else
	if (ia_valid & ATTR_ATIME_SET)
		attr->ia_atime = timestamp_truncate(attr->ia_atime, inode);
	if (!(ia_valid & ATTR_MTIME_SET))
		attr->ia_mtime = now;
	else
		attr->ia_atime = now;
	if (ia_valid & ATTR_CTIME_SET)
		attr->ia_ctime = timestamp_truncate(attr->ia_ctime, inode);
	else
		attr->ia_ctime = now;
	if (ia_valid & ATTR_MTIME_SET)
		attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode);
	else
		attr->ia_mtime = now;

	if (ia_valid & ATTR_KILL_PRIV) {
		error = security_inode_need_killpriv(dentry);
+1 −1
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@ config NFSD
	depends on FILE_LOCKING
	depends on FSNOTIFY
	select CRC32
	select CRYPTO_LIB_SHA256 if NFSD_V4
	select LOCKD
	select SUNRPC
	select EXPORTFS
@@ -77,7 +78,6 @@ config NFSD_V4
	select FS_POSIX_ACL
	select RPCSEC_GSS_KRB5
	select CRYPTO
	select CRYPTO_LIB_SHA256
	select CRYPTO_MD5
	select GRACE_PERIOD
	select NFS_V4_2_SSC_HELPER if NFS_V4_2
+19 −13
Original line number Diff line number Diff line
@@ -18,8 +18,8 @@


static __be32
nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
		struct nfsd4_layoutget *args)
nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
		const struct svc_fh *fhp, struct nfsd4_layoutget *args)
{
	struct nfsd4_layout_seg *seg = &args->lg_seg;
	struct super_block *sb = inode->i_sb;
@@ -29,6 +29,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
	u32 device_generation = 0;
	int error;

	if (locks_in_grace(SVC_NET(rqstp)))
		return nfserr_grace;

	if (seg->offset & (block_size - 1)) {
		dprintk("pnfsd: I/O misaligned\n");
		goto out_layoutunavailable;
@@ -118,7 +121,6 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
		struct iomap *iomaps, int nr_iomaps)
{
	struct timespec64 mtime = inode_get_mtime(inode);
	loff_t new_size = lcp->lc_last_wr + 1;
	struct iattr iattr = { .ia_valid = 0 };
	int error;

@@ -128,9 +130,9 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
	iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
	iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;

	if (new_size > i_size_read(inode)) {
	if (lcp->lc_size_chg) {
		iattr.ia_valid |= ATTR_SIZE;
		iattr.ia_size = new_size;
		iattr.ia_size = lcp->lc_newsize;
	}

	error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
@@ -173,16 +175,18 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
}

static __be32
nfsd4_block_proc_layoutcommit(struct inode *inode,
nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
		struct nfsd4_layoutcommit *lcp)
{
	struct iomap *iomaps;
	int nr_iomaps;
	__be32 nfserr;

	nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
			lcp->lc_up_len, &iomaps, &nr_iomaps,
			i_blocksize(inode));
	rqstp->rq_arg = lcp->lc_up_layout;
	svcxdr_init_decode(rqstp);

	nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
			&iomaps, &nr_iomaps, i_blocksize(inode));
	if (nfserr != nfs_ok)
		return nfserr;

@@ -313,16 +317,18 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
	return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
}
static __be32
nfsd4_scsi_proc_layoutcommit(struct inode *inode,
nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
		struct nfsd4_layoutcommit *lcp)
{
	struct iomap *iomaps;
	int nr_iomaps;
	__be32 nfserr;

	nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
			lcp->lc_up_len, &iomaps, &nr_iomaps,
			i_blocksize(inode));
	rqstp->rq_arg = lcp->lc_up_layout;
	svcxdr_init_decode(rqstp);

	nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
			&iomaps, &nr_iomaps, i_blocksize(inode));
	if (nfserr != nfs_ok)
		return nfserr;

+56 −30
Original line number Diff line number Diff line
@@ -29,8 +29,7 @@ nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
	*p++ = cpu_to_be32(len);
	*p++ = cpu_to_be32(1);		/* we always return a single extent */

	p = xdr_encode_opaque_fixed(p, &b->vol_id,
			sizeof(struct nfsd4_deviceid));
	p = svcxdr_encode_deviceid4(p, &b->vol_id);
	p = xdr_encode_hyper(p, b->foff);
	p = xdr_encode_hyper(p, b->len);
	p = xdr_encode_hyper(p, b->soff);
@@ -114,8 +113,7 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,

/**
 * nfsd4_block_decode_layoutupdate - decode the block layout extent array
 * @p: pointer to the xdr data
 * @len: number of bytes to decode
 * @xdr: subbuf set to the encoded array
 * @iomapp: pointer to store the decoded extent array
 * @nr_iomapsp: pointer to store the number of extents
 * @block_size: alignment of extent offset and length
@@ -128,25 +126,24 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
 *
 * Return values:
 *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
 *   %nfserr_bad_xdr: The encoded array in @p is invalid
 *   %nfserr_bad_xdr: The encoded array in @xdr is invalid
 *   %nfserr_inval: An unaligned extent found
 *   %nfserr_delay: Failed to allocate memory for @iomapp
 */
__be32
nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
		int *nr_iomapsp, u32 block_size)
{
	struct iomap *iomaps;
	u32 nr_iomaps, i;
	u32 nr_iomaps, expected, len, i;
	__be32 nfserr;

	if (len < sizeof(u32))
		return nfserr_bad_xdr;
	len -= sizeof(u32);
	if (len % PNFS_BLOCK_EXTENT_SIZE)
	if (xdr_stream_decode_u32(xdr, &nr_iomaps))
		return nfserr_bad_xdr;

	nr_iomaps = be32_to_cpup(p++);
	if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE)
	len = sizeof(__be32) + xdr_stream_remaining(xdr);
	expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
	if (len != expected)
		return nfserr_bad_xdr;

	iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
@@ -156,23 +153,44 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
	for (i = 0; i < nr_iomaps; i++) {
		struct pnfs_block_extent bex;

		memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid));
		p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid));
		if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}

		p = xdr_decode_hyper(p, &bex.foff);
		if (xdr_stream_decode_u64(xdr, &bex.foff)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		if (bex.foff & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}

		if (xdr_stream_decode_u64(xdr, &bex.len)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		p = xdr_decode_hyper(p, &bex.len);
		if (bex.len & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}

		if (xdr_stream_decode_u64(xdr, &bex.soff)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		p = xdr_decode_hyper(p, &bex.soff);
		if (bex.soff & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}

		if (xdr_stream_decode_u32(xdr, &bex.es)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		bex.es = be32_to_cpup(p++);
		if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
			nfserr = nfserr_inval;
			goto fail;
		}

@@ -185,13 +203,12 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
	return nfs_ok;
fail:
	kfree(iomaps);
	return nfserr_inval;
	return nfserr;
}

/**
 * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
 * @p: pointer to the xdr data
 * @len: number of bytes to decode
 * @xdr: subbuf set to the encoded array
 * @iomapp: pointer to store the decoded extent array
 * @nr_iomapsp: pointer to store the number of extents
 * @block_size: alignment of extent offset and length
@@ -203,21 +220,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
 *
 * Return values:
 *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
 *   %nfserr_bad_xdr: The encoded array in @p is invalid
 *   %nfserr_bad_xdr: The encoded array in @xdr is invalid
 *   %nfserr_inval: An unaligned extent found
 *   %nfserr_delay: Failed to allocate memory for @iomapp
 */
__be32
nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
		int *nr_iomapsp, u32 block_size)
{
	struct iomap *iomaps;
	u32 nr_iomaps, expected, i;
	u32 nr_iomaps, expected, len, i;
	__be32 nfserr;

	if (len < sizeof(u32))
	if (xdr_stream_decode_u32(xdr, &nr_iomaps))
		return nfserr_bad_xdr;

	nr_iomaps = be32_to_cpup(p++);
	len = sizeof(__be32) + xdr_stream_remaining(xdr);
	expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
	if (len != expected)
		return nfserr_bad_xdr;
@@ -229,14 +247,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
	for (i = 0; i < nr_iomaps; i++) {
		u64 val;

		p = xdr_decode_hyper(p, &val);
		if (xdr_stream_decode_u64(xdr, &val)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		if (val & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}
		iomaps[i].offset = val;

		p = xdr_decode_hyper(p, &val);
		if (xdr_stream_decode_u64(xdr, &val)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		if (val & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}
		iomaps[i].length = val;
@@ -247,5 +273,5 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
	return nfs_ok;
fail:
	kfree(iomaps);
	return nfserr_inval;
	return nfserr;
}
+2 −2
Original line number Diff line number Diff line
@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
		const struct nfsd4_getdeviceinfo *gdp);
__be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
		const struct nfsd4_layoutget *lgp);
__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len,
__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr,
		struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len,
__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr,
		struct iomap **iomapp, int *nr_iomapsp, u32 block_size);

#endif /* _NFSD_BLOCKLAYOUTXDR_H */
Loading