Commit 4f40c636 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'nfs-for-6.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
 "New Features:
   - Add support for large folios
   - Implement rpcrdma generic device removal notification
   - Add client support for attribute delegations
   - Use a LAYOUTRETURN during reboot recovery to report layoutstats
     and errors
   - Improve throughput for random buffered writes
   - Add NVMe support to pnfs/blocklayout

  Bugfixes:
   - Fix rpcrdma_reqs_reset()
   - Avoid soft lockups when using UDP
   - Fix an nfs/blocklayout premature PR key unregestration
   - Another fix for EXCHGID4_FLAG_USE_PNFS_DS for DS server
   - Do not extend writes to the entire folio
   - Pass explicit offset and count values to tracepoints
   - Fix a race to wake up sleeping SUNRPC sync tasks
   - Fix gss_status tracepoint output

  Cleanups:
   - Add missing MODULE_DESCRIPTION() macros
   - Add blocklayout / SCSI layout tracepoints
   - Remove asm-generic headers from xprtrdma verbs.c
   - Remove unused 'struct mnt_fhstatus'
   - Other delegation related cleanups
   - Other folio related cleanups
   - Other pNFS related cleanups
   - Other xprtrdma cleanups"

* tag 'nfs-for-6.11-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (63 commits)
  SUNRPC: Fixup gss_status tracepoint error output
  SUNRPC: Fix a race to wake a sync task
  nfs: split nfs_read_folio
  nfs: pass explicit offset/count to trace events
  nfs: do not extend writes to the entire folio
  nfs/blocklayout: add support for NVMe
  nfs: remove nfs_page_length
  nfs: remove the unused max_deviceinfo_size field from struct pnfs_layoutdriver_type
  nfs: don't reuse partially completed requests in nfs_lock_and_join_requests
  nfs: move nfs_wait_on_request to write.c
  nfs: fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests
  nfs: fold nfs_folio_find_and_lock_request into nfs_lock_and_join_requests
  nfs: simplify nfs_folio_find_and_lock_request
  nfs: remove nfs_folio_private_request
  nfs: remove dead code for the old swap over NFS implementation
  NFSv4.1 another fix for EXCHGID4_FLAG_USE_PNFS_DS for DS server
  nfs: Block on write congestion
  nfs: Properly initialize server->writeback
  nfs: Drop pointless check from nfs_commit_release_pages()
  nfs/blocklayout: SCSI layout trace points for reservation key reg/unreg
  ...
parents 51ed42a8 b9fae9f0
Loading
Loading
Loading
Loading
+16 −9
Original line number Diff line number Diff line
@@ -564,25 +564,32 @@ bl_find_get_deviceid(struct nfs_server *server,
		gfp_t gfp_mask)
{
	struct nfs4_deviceid_node *node;
	unsigned long start, end;
	int err = -ENODEV;

retry:
	node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
	if (!node)
		return ERR_PTR(-ENODEV);

	if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
		return node;
	if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
		unsigned long end = jiffies;
		unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;

	end = jiffies;
	start = end - PNFS_DEVICE_RETRY_TIMEOUT;
		if (!time_in_range(node->timestamp_unavailable, start, end)) {
			nfs4_delete_deviceid(node->ld, node->nfs_client, id);
			goto retry;
		}
		goto out_put;
	}

	if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node)))
		goto out_put;

	return node;

out_put:
	nfs4_put_deviceid_node(node);
	return ERR_PTR(-ENODEV);
	return ERR_PTR(err);
}

static int
+8 −1
Original line number Diff line number Diff line
@@ -104,20 +104,26 @@ struct pnfs_block_dev {
	u64				start;
	u64				len;

	enum pnfs_block_volume_type	type;
	u32				nr_children;
	struct pnfs_block_dev		*children;
	u64				chunk_size;

	struct file			*bdev_file;
	u64				disk_offset;
	unsigned long			flags;

	u64				pr_key;
	bool				pr_registered;

	bool (*map)(struct pnfs_block_dev *dev, u64 offset,
			struct pnfs_block_dev_map *map);
};

/* pnfs_block_dev flag bits */
enum {
	PNFS_BDEV_REGISTERED = 0,
};

/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
	union {
@@ -172,6 +178,7 @@ struct bl_msg_hdr {
#define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */

/* dev.c */
bool bl_register_dev(struct pnfs_block_dev *d);
struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
		struct pnfs_device *pdev, gfp_t gfp_mask);
void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
+87 −29
Original line number Diff line number Diff line
@@ -10,12 +10,83 @@
#include <linux/pr.h>

#include "blocklayout.h"
#include "../nfs4trace.h"

#define NFSDBG_FACILITY		NFSDBG_PNFS_LD

static void bl_unregister_scsi(struct pnfs_block_dev *dev)
{
	struct block_device *bdev = file_bdev(dev->bdev_file);
	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
	int status;

	if (!test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
		return;

	status = ops->pr_register(bdev, dev->pr_key, 0, false);
	if (status)
		trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status);
	else
		trace_bl_pr_key_unreg(bdev, dev->pr_key);
}

static bool bl_register_scsi(struct pnfs_block_dev *dev)
{
	struct block_device *bdev = file_bdev(dev->bdev_file);
	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
	int status;

	if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags))
		return true;

	status = ops->pr_register(bdev, 0, dev->pr_key, true);
	if (status) {
		trace_bl_pr_key_reg_err(bdev, dev->pr_key, status);
		return false;
	}
	trace_bl_pr_key_reg(bdev, dev->pr_key);
	return true;
}

static void bl_unregister_dev(struct pnfs_block_dev *dev)
{
	u32 i;

	if (dev->nr_children) {
		for (i = 0; i < dev->nr_children; i++)
			bl_unregister_dev(&dev->children[i]);
		return;
	}

	if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
		bl_unregister_scsi(dev);
}

bool bl_register_dev(struct pnfs_block_dev *dev)
{
	u32 i;

	if (dev->nr_children) {
		for (i = 0; i < dev->nr_children; i++) {
			if (!bl_register_dev(&dev->children[i])) {
				while (i > 0)
					bl_unregister_dev(&dev->children[--i]);
				return false;
			}
		}
		return true;
	}

	if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
		return bl_register_scsi(dev);
	return true;
}

static void
bl_free_device(struct pnfs_block_dev *dev)
{
	bl_unregister_dev(dev);

	if (dev->nr_children) {
		int i;

@@ -23,17 +94,6 @@ bl_free_device(struct pnfs_block_dev *dev)
			bl_free_device(&dev->children[i]);
		kfree(dev->children);
	} else {
		if (dev->pr_registered) {
			const struct pr_ops *ops =
				file_bdev(dev->bdev_file)->bd_disk->fops->pr_ops;
			int error;

			error = ops->pr_register(file_bdev(dev->bdev_file),
				dev->pr_key, 0, false);
			if (error)
				pr_err("failed to unregister PR key.\n");
		}

		if (dev->bdev_file)
			fput(dev->bdev_file);
	}
@@ -314,7 +374,7 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
	bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
					NULL, NULL);
	if (IS_ERR(bdev_file)) {
		pr_warn("pNFS: failed to open device %s (%ld)\n",
		dprintk("failed to open device %s (%ld)\n",
			devname, PTR_ERR(bdev_file));
	}

@@ -327,8 +387,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
	struct pnfs_block_volume *v = &volumes[idx];
	struct file *bdev_file;
	struct block_device *bdev;
	const struct pr_ops *ops;
	struct file *bdev_file;
	int error;

	if (!bl_validate_designator(v))
@@ -344,35 +405,30 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
	if (IS_ERR(bdev_file))
		bdev_file = bl_open_path(v, "wwn-0x");
	if (IS_ERR(bdev_file))
		bdev_file = bl_open_path(v, "nvme-eui.");
	if (IS_ERR(bdev_file)) {
		pr_warn("pNFS: no device found for volume %*phN\n",
			v->scsi.designator_len, v->scsi.designator);
		return PTR_ERR(bdev_file);
	}
	d->bdev_file = bdev_file;
	bdev = file_bdev(bdev_file);

	d->len = bdev_nr_bytes(file_bdev(d->bdev_file));
	d->len = bdev_nr_bytes(bdev);
	d->map = bl_map_simple;
	d->pr_key = v->scsi.pr_key;

	if (d->len == 0)
		return -ENODEV;

	pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
		file_bdev(d->bdev_file)->bd_disk->disk_name, d->pr_key);

	ops = file_bdev(d->bdev_file)->bd_disk->fops->pr_ops;
	ops = bdev->bd_disk->fops->pr_ops;
	if (!ops) {
		pr_err("pNFS: block device %s does not support reservations.",
				file_bdev(d->bdev_file)->bd_disk->disk_name);
				bdev->bd_disk->disk_name);
		error = -EINVAL;
		goto out_blkdev_put;
	}

	error = ops->pr_register(file_bdev(d->bdev_file), 0, d->pr_key, true);
	if (error) {
		pr_err("pNFS: failed to register key for block device %s.",
				file_bdev(d->bdev_file)->bd_disk->disk_name);
		goto out_blkdev_put;
	}

	d->pr_registered = true;
	return 0;

out_blkdev_put:
@@ -458,7 +514,9 @@ static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
	switch (volumes[idx].type) {
	d->type = volumes[idx].type;

	switch (d->type) {
	case PNFS_BLOCK_VOLUME_SIMPLE:
		return bl_parse_simple(server, d, volumes, idx, gfp_mask);
	case PNFS_BLOCK_VOLUME_SLICE:
@@ -470,7 +528,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
	case PNFS_BLOCK_VOLUME_SCSI:
		return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
	default:
		dprintk("unsupported volume type: %d\n", volumes[idx].type);
		dprintk("unsupported volume type: %d\n", d->type);
		return -EIO;
	}
}
+3 −2
Original line number Diff line number Diff line
@@ -46,14 +46,15 @@ struct cb_compound_hdr_res {

struct cb_getattrargs {
	struct nfs_fh fh;
	uint32_t bitmap[2];
	uint32_t bitmap[3];
};

struct cb_getattrres {
	__be32 status;
	uint32_t bitmap[2];
	uint32_t bitmap[3];
	uint64_t size;
	uint64_t change_attr;
	struct timespec64 atime;
	struct timespec64 ctime;
	struct timespec64 mtime;
};
+12 −7
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
	if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
		goto out;

	res->bitmap[0] = res->bitmap[1] = 0;
	memset(res->bitmap, 0, sizeof(res->bitmap));
	res->status = htonl(NFS4ERR_BADHANDLE);

	dprintk_rcu("NFS: GETATTR callback request from %s\n",
@@ -59,12 +59,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
	res->change_attr = delegation->change_attr;
	if (nfs_have_writebacks(inode))
		res->change_attr++;
	res->atime = inode_get_atime(inode);
	res->ctime = inode_get_ctime(inode);
	res->mtime = inode_get_mtime(inode);
	res->bitmap[0] = (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE) &
			 args->bitmap[0];
	res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
		args->bitmap[1];
	res->bitmap[1] = (FATTR4_WORD1_TIME_ACCESS |
			  FATTR4_WORD1_TIME_METADATA |
			  FATTR4_WORD1_TIME_MODIFY) & args->bitmap[1];
	res->bitmap[2] = (FATTR4_WORD2_TIME_DELEG_ACCESS |
			  FATTR4_WORD2_TIME_DELEG_MODIFY) & args->bitmap[2];
	res->status = 0;
out_iput:
	rcu_read_unlock();
@@ -319,9 +323,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
	int stat;

	if (args->cbl_recall_type == RETURN_FSID)
		stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
		stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid,
						  PNFS_LAYOUT_BULK_RETURN);
	else
		stat = pnfs_destroy_layouts_byclid(clp, true);
		stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN);
	if (stat != 0)
		return NFS4ERR_DELAY;
	return NFS4ERR_NOMATCHING_LAYOUT;
Loading