Commit f963cf2b authored by Sergey Bashirov's avatar Sergey Bashirov Committed by Chuck Lever
Browse files

NFSD: Implement large extent array support in pNFS



When pNFS client in the block or scsi layout mode sends layoutcommit
to MDS, a variable length array of modified extents is supplied within
the request. This patch allows the server to accept such extent arrays
if they do not fit within single memory page.

The issue can be reproduced when writing to a 1GB file using FIO with
O_DIRECT, 4K block and large I/O depth without preallocation of the
file. In this case, the server returns NFSERR_BADXDR to the client.

Co-developed-by: default avatarKonstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: default avatarKonstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: default avatarSergey Bashirov <sergeybashirov@gmail.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent 6bf1be33
Loading
Loading
Loading
Loading
+12 −8
Original line number Diff line number Diff line
@@ -173,16 +173,18 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
}

static __be32
nfsd4_block_proc_layoutcommit(struct inode *inode,
nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
		struct nfsd4_layoutcommit *lcp)
{
	struct iomap *iomaps;
	int nr_iomaps;
	__be32 nfserr;

	nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
			lcp->lc_up_len, &iomaps, &nr_iomaps,
			i_blocksize(inode));
	rqstp->rq_arg = lcp->lc_up_layout;
	svcxdr_init_decode(rqstp);

	nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
			&iomaps, &nr_iomaps, i_blocksize(inode));
	if (nfserr != nfs_ok)
		return nfserr;

@@ -313,16 +315,18 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
	return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
}
static __be32
nfsd4_scsi_proc_layoutcommit(struct inode *inode,
nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
		struct nfsd4_layoutcommit *lcp)
{
	struct iomap *iomaps;
	int nr_iomaps;
	__be32 nfserr;

	nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
			lcp->lc_up_len, &iomaps, &nr_iomaps,
			i_blocksize(inode));
	rqstp->rq_arg = lcp->lc_up_layout;
	svcxdr_init_decode(rqstp);

	nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
			&iomaps, &nr_iomaps, i_blocksize(inode));
	if (nfserr != nfs_ok)
		return nfserr;

+56 −27
Original line number Diff line number Diff line
@@ -113,8 +113,7 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,

/**
 * nfsd4_block_decode_layoutupdate - decode the block layout extent array
 * @p: pointer to the xdr data
 * @len: number of bytes to decode
 * @xdr: subbuf set to the encoded array
 * @iomapp: pointer to store the decoded extent array
 * @nr_iomapsp: pointer to store the number of extents
 * @block_size: alignment of extent offset and length
@@ -127,25 +126,24 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
 *
 * Return values:
 *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
 *   %nfserr_bad_xdr: The encoded array in @p is invalid
 *   %nfserr_bad_xdr: The encoded array in @xdr is invalid
 *   %nfserr_inval: An unaligned extent found
 *   %nfserr_delay: Failed to allocate memory for @iomapp
 */
__be32
nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
		int *nr_iomapsp, u32 block_size)
{
	struct iomap *iomaps;
	u32 nr_iomaps, i;
	u32 nr_iomaps, expected, len, i;
	__be32 nfserr;

	if (len < sizeof(u32))
		return nfserr_bad_xdr;
	len -= sizeof(u32);
	if (len % PNFS_BLOCK_EXTENT_SIZE)
	if (xdr_stream_decode_u32(xdr, &nr_iomaps))
		return nfserr_bad_xdr;

	nr_iomaps = be32_to_cpup(p++);
	if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE)
	len = sizeof(__be32) + xdr_stream_remaining(xdr);
	expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
	if (len != expected)
		return nfserr_bad_xdr;

	iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
@@ -155,21 +153,44 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
	for (i = 0; i < nr_iomaps; i++) {
		struct pnfs_block_extent bex;

		p = svcxdr_decode_deviceid4(p, &bex.vol_id);
		p = xdr_decode_hyper(p, &bex.foff);
		if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}

		if (xdr_stream_decode_u64(xdr, &bex.foff)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		if (bex.foff & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}

		if (xdr_stream_decode_u64(xdr, &bex.len)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		p = xdr_decode_hyper(p, &bex.len);
		if (bex.len & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}

		if (xdr_stream_decode_u64(xdr, &bex.soff)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		p = xdr_decode_hyper(p, &bex.soff);
		if (bex.soff & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}

		if (xdr_stream_decode_u32(xdr, &bex.es)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		bex.es = be32_to_cpup(p++);
		if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
			nfserr = nfserr_inval;
			goto fail;
		}

@@ -182,13 +203,12 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
	return nfs_ok;
fail:
	kfree(iomaps);
	return nfserr_inval;
	return nfserr;
}

/**
 * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
 * @p: pointer to the xdr data
 * @len: number of bytes to decode
 * @xdr: subbuf set to the encoded array
 * @iomapp: pointer to store the decoded extent array
 * @nr_iomapsp: pointer to store the number of extents
 * @block_size: alignment of extent offset and length
@@ -200,21 +220,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
 *
 * Return values:
 *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
 *   %nfserr_bad_xdr: The encoded array in @p is invalid
 *   %nfserr_bad_xdr: The encoded array in @xdr is invalid
 *   %nfserr_inval: An unaligned extent found
 *   %nfserr_delay: Failed to allocate memory for @iomapp
 */
__be32
nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
		int *nr_iomapsp, u32 block_size)
{
	struct iomap *iomaps;
	u32 nr_iomaps, expected, i;
	u32 nr_iomaps, expected, len, i;
	__be32 nfserr;

	if (len < sizeof(u32))
	if (xdr_stream_decode_u32(xdr, &nr_iomaps))
		return nfserr_bad_xdr;

	nr_iomaps = be32_to_cpup(p++);
	len = sizeof(__be32) + xdr_stream_remaining(xdr);
	expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
	if (len != expected)
		return nfserr_bad_xdr;
@@ -226,14 +247,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
	for (i = 0; i < nr_iomaps; i++) {
		u64 val;

		p = xdr_decode_hyper(p, &val);
		if (xdr_stream_decode_u64(xdr, &val)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		if (val & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}
		iomaps[i].offset = val;

		p = xdr_decode_hyper(p, &val);
		if (xdr_stream_decode_u64(xdr, &val)) {
			nfserr = nfserr_bad_xdr;
			goto fail;
		}
		if (val & (block_size - 1)) {
			nfserr = nfserr_inval;
			goto fail;
		}
		iomaps[i].length = val;
@@ -244,5 +273,5 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
	return nfs_ok;
fail:
	kfree(iomaps);
	return nfserr_inval;
	return nfserr;
}
+2 −2
Original line number Diff line number Diff line
@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
		const struct nfsd4_getdeviceinfo *gdp);
__be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
		const struct nfsd4_layoutget *lgp);
__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len,
__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr,
		struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len,
__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr,
		struct iomap **iomapp, int *nr_iomapsp, u32 block_size);

#endif /* _NFSD_BLOCKLAYOUTXDR_H */
+1 −1
Original line number Diff line number Diff line
@@ -2520,7 +2520,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
		lcp->lc_size_chg = false;
	}

	nfserr = ops->proc_layoutcommit(inode, lcp);
	nfserr = ops->proc_layoutcommit(inode, rqstp, lcp);
	nfs4_put_stid(&ls->ls_stid);
out:
	return nfserr;
+5 −6
Original line number Diff line number Diff line
@@ -592,6 +592,8 @@ static __be32
nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
			   struct nfsd4_layoutcommit *lcp)
{
	u32 len;

	if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0)
		return nfserr_bad_xdr;
	if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES)
@@ -599,13 +601,10 @@ nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
	if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX)
		return nfserr_bad_xdr;

	if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
	if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
		return nfserr_bad_xdr;
	if (lcp->lc_up_len > 0) {
		lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
		if (!lcp->lc_up_layout)
	if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, len))
		return nfserr_bad_xdr;
	}

	return nfs_ok;
}
Loading