Commit 4934ccbe authored by Jonathan Curley's avatar Jonathan Curley Committed by Anna Schumaker
Browse files

NFSv4/flexfiles: Read path updates for striped layouts



Updates read path to calculate and use dss_id to direct IO to the
appropriate stripe DS.

Signed-off-by: default avatarJonathan Curley <jcurley@purestorage.com>
Signed-off-by: default avatarAnna Schumaker <anna.schumaker@oracle.com>
parent a1491919
Loading
Loading
Loading
Loading
+98 −24
Original line number Diff line number Diff line
@@ -770,6 +770,7 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
static struct nfs4_pnfs_ds *
ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
			     u32 start_idx, u32 *best_idx,
			     u32 offset, u32 *dss_id,
			     bool check_device)
{
	struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
@@ -780,12 +781,16 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
	/* mirrors are initially sorted by efficiency */
	for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
		mirror = FF_LAYOUT_COMP(lseg, idx);
		ds = nfs4_ff_layout_prepare_ds(lseg, mirror, 0, false);
		*dss_id = nfs4_ff_layout_calc_dss_id(
			fls->stripe_unit,
			fls->mirror_array[idx]->dss_count,
			offset);
		ds = nfs4_ff_layout_prepare_ds(lseg, mirror, *dss_id, false);
		if (IS_ERR(ds))
			continue;

		if (check_device &&
		    nfs4_test_deviceid_unavailable(&mirror->dss[0].mirror_ds->id_node)) {
		    nfs4_test_deviceid_unavailable(&mirror->dss[*dss_id].mirror_ds->id_node)) {
			// reinitialize the error state in case if this is the last iteration
			ds = ERR_PTR(-EINVAL);
			continue;
@@ -800,42 +805,52 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,

static struct nfs4_pnfs_ds *
ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
				 u32 start_idx, u32 *best_idx)
				 u32 start_idx, u32 *best_idx,
				 u32 offset, u32 *dss_id)
{
	return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
	return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx,
					    offset, dss_id, false);
}

static struct nfs4_pnfs_ds *
ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
				   u32 start_idx, u32 *best_idx)
				   u32 start_idx, u32 *best_idx,
				   u32 offset, u32 *dss_id)
{
	return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
	return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx,
					    offset, dss_id, true);
}

static struct nfs4_pnfs_ds *
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
				  u32 start_idx, u32 *best_idx)
				  u32 start_idx, u32 *best_idx,
				  u32 offset, u32 *dss_id)
{
	struct nfs4_pnfs_ds *ds;

	ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
	ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx,
						offset, dss_id);
	if (!IS_ERR(ds))
		return ds;
	return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
	return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx,
						offset, dss_id);
}

static struct nfs4_pnfs_ds *
ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
			  u32 *best_idx)
			  u32 *best_idx,
			  u32 offset,
			  u32 *dss_id)
{
	struct pnfs_layout_segment *lseg = pgio->pg_lseg;
	struct nfs4_pnfs_ds *ds;

	ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
					       best_idx);
					       best_idx, offset, dss_id);
	if (!IS_ERR(ds) || !pgio->pg_mirror_idx)
		return ds;
	return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
	return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx,
						 offset, dss_id);
}

static void
@@ -854,6 +869,56 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
	}
}

static bool
ff_layout_lseg_is_striped(const struct nfs4_ff_layout_segment *fls)
{
	return fls->mirror_array[0]->dss_count > 1;
}

/*
 * ff_layout_pg_test(). Called by nfs_can_coalesce_requests()
 *
 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
 * of bytes (maximum @req->wb_bytes) that can be coalesced.
 */
static size_t
ff_layout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
		  struct nfs_page *req)
{
	unsigned int size;
	u64 p_stripe, r_stripe;
	u32 stripe_offset;
	u64 segment_offset = pgio->pg_lseg->pls_range.offset;
	u32 stripe_unit = FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;

	/* calls nfs_generic_pg_test */
	size = pnfs_generic_pg_test(pgio, prev, req);
	if (!size)
		return 0;
	else if (!ff_layout_lseg_is_striped(FF_LAYOUT_LSEG(pgio->pg_lseg)))
		return size;

	/* see if req and prev are in the same stripe */
	if (prev) {
		p_stripe = (u64)req_offset(prev) - segment_offset;
		r_stripe = (u64)req_offset(req) - segment_offset;
		do_div(p_stripe, stripe_unit);
		do_div(r_stripe, stripe_unit);

		if (p_stripe != r_stripe)
			return 0;
	}

	/* calculate remaining bytes in the current stripe */
	div_u64_rem((u64)req_offset(req) - segment_offset,
			stripe_unit,
			&stripe_offset);
	WARN_ON_ONCE(stripe_offset > stripe_unit);
	if (stripe_offset >= stripe_unit)
		return 0;
	return min(stripe_unit - (unsigned int)stripe_offset, size);
}

static void
ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
			struct nfs_page *req)
@@ -861,7 +926,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
	struct nfs_pgio_mirror *pgm;
	struct nfs4_ff_layout_mirror *mirror;
	struct nfs4_pnfs_ds *ds;
	u32 ds_idx;
	u32 ds_idx, dss_id;

	if (NFS_SERVER(pgio->pg_inode)->flags &
			(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
@@ -882,7 +947,8 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
	/* Reset wb_nio, since getting layout segment was successful */
	req->wb_nio = 0;

	ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
	ds = ff_layout_get_ds_for_read(pgio, &ds_idx,
				       req_offset(req), &dss_id);
	if (IS_ERR(ds)) {
		if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
			goto out_mds;
@@ -894,7 +960,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,

	mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
	pgm = &pgio->pg_mirrors[0];
	pgm->pg_bsize = mirror->dss[0].mirror_ds->ds_versions[0].rsize;
	pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize;

	pgio->pg_mirror_idx = ds_idx;
	return;
@@ -1032,7 +1098,7 @@ ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)

static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
	.pg_init = ff_layout_pg_init_read,
	.pg_test = pnfs_generic_pg_test,
	.pg_test = ff_layout_pg_test,
	.pg_doio = pnfs_generic_pg_readpages,
	.pg_cleanup = pnfs_generic_pg_cleanup,
};
@@ -1087,9 +1153,11 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
{
	u32 idx = hdr->pgio_mirror_idx + 1;
	u32 new_idx = 0;
	u32 dss_id = 0;
	struct nfs4_pnfs_ds *ds;

	ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx);
	ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx,
					      hdr->args.offset, &dss_id);
	if (IS_ERR(ds))
		pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
	else
@@ -1884,6 +1952,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
	u32 idx = hdr->pgio_mirror_idx;
	int vers;
	struct nfs_fh *fh;
	u32 dss_id;
	bool ds_fatal_error = false;

	dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
@@ -1891,22 +1960,26 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
		hdr->args.pgbase, (size_t)hdr->args.count, offset);

	mirror = FF_LAYOUT_COMP(lseg, idx);
	ds = nfs4_ff_layout_prepare_ds(lseg, mirror, 0, false);
	dss_id = nfs4_ff_layout_calc_dss_id(
		FF_LAYOUT_LSEG(lseg)->stripe_unit,
		mirror->dss_count,
		offset);
	ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, false);
	if (IS_ERR(ds)) {
		ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
		goto out_failed;
	}

	ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
						   hdr->inode, 0);
						   hdr->inode, dss_id);
	if (IS_ERR(ds_clnt))
		goto out_failed;

	ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, 0);
	ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id);
	if (!ds_cred)
		goto out_failed;

	vers = nfs4_ff_layout_ds_version(mirror, 0);
	vers = nfs4_ff_layout_ds_version(mirror, dss_id);

	dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
		ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
@@ -1914,11 +1987,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
	hdr->pgio_done_cb = ff_layout_read_done_cb;
	refcount_inc(&ds->ds_clp->cl_count);
	hdr->ds_clp = ds->ds_clp;
	fh = nfs4_ff_layout_select_ds_fh(mirror, 0);
	fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id);
	if (fh)
		hdr->args.fh = fh;

	nfs4_ff_layout_select_ds_stateid(mirror, 0, &hdr->args.stateid);
	nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid);

	/*
	 * Note that if we ever decide to split across DSes,
@@ -1928,7 +2001,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
	hdr->mds_offset = offset;

	/* Start IO accounting for local read */
	localio = ff_local_open_fh(lseg, idx, 0, ds->ds_clp, ds_cred, fh, FMODE_READ);
	localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh,
				FMODE_READ);
	if (localio) {
		hdr->task.tk_start = ktime_get();
		ff_layout_read_record_layoutstats_start(&hdr->task, hdr);