Commit 20b1d75f authored by Jonathan Curley's avatar Jonathan Curley Committed by Anna Schumaker
Browse files

NFSv4/flexfiles: Add support for striped layouts



Updates lseg creation path to parse and add striped layouts. Enable
support for striped layouts.

Limitations:

1. All mirrors must have the same number of stripes.

Signed-off-by: default avatarJonathan Curley <jcurley@purestorage.com>
Signed-off-by: default avatarAnna Schumaker <anna.schumaker@oracle.com>
parent 8a8e0f55
Loading
Loading
Loading
Loading
+155 −92
Original line number Diff line number Diff line
@@ -177,18 +177,19 @@ ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id,
#endif
}

static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
		const struct nfs4_ff_layout_mirror *m2)
static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1,
		const struct nfs4_ff_layout_ds_stripe *dss2)
{
	int i, j;

	if (m1->dss[0].fh_versions_cnt != m2->dss[0].fh_versions_cnt)
	if (dss1->fh_versions_cnt != dss2->fh_versions_cnt)
		return false;
	for (i = 0; i < m1->dss[0].fh_versions_cnt; i++) {

	for (i = 0; i < dss1->fh_versions_cnt; i++) {
		bool found_fh = false;
		for (j = 0; j < m2->dss[0].fh_versions_cnt; j++) {
			if (nfs_compare_fh(&m1->dss[0].fh_versions[i],
					&m2->dss[0].fh_versions[j]) == 0) {
		for (j = 0; j < dss2->fh_versions_cnt; j++) {
			if (nfs_compare_fh(&dss1->fh_versions[i],
					&dss2->fh_versions[j]) == 0) {
				found_fh = true;
				break;
			}
@@ -199,6 +200,38 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
	return true;
}

static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
		const struct nfs4_ff_layout_mirror *m2)
{
	u32 dss_id;

	if (m1->dss_count != m2->dss_count)
		return false;

	for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
		if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id]))
			return false;

	return true;
}

static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1,
		const struct nfs4_ff_layout_mirror *m2)
{
	u32 dss_id;

	if (m1->dss_count != m2->dss_count)
		return false;

	for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
		if (memcmp(&m1->dss[dss_id].devid,
			   &m2->dss[dss_id].devid,
			   sizeof(m1->dss[dss_id].devid)) != 0)
			return false;

	return true;
}

static struct nfs4_ff_layout_mirror *
ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
		struct nfs4_ff_layout_mirror *mirror)
@@ -209,8 +242,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,

	spin_lock(&inode->i_lock);
	list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
		if (memcmp(&mirror->dss[0].devid, &pos->dss[0].devid,
			   sizeof(pos->dss[0].devid)) != 0)
		if (!ff_mirror_match_devid(mirror, pos))
			continue;
		if (!ff_mirror_match_fh(mirror, pos))
			continue;
@@ -241,13 +273,15 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
{
	struct nfs4_ff_layout_mirror *mirror;
	u32 dss_id;

	mirror = kzalloc(sizeof(*mirror), gfp_flags);
	if (mirror != NULL) {
		spin_lock_init(&mirror->lock);
		refcount_set(&mirror->ref, 1);
		INIT_LIST_HEAD(&mirror->mirrors);
		nfs_localio_file_init(&mirror->dss[0].nfl);
		for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
			nfs_localio_file_init(&mirror->dss[dss_id].nfl);
	}
	return mirror;
}
@@ -255,17 +289,19 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
	const struct cred	*cred;
	int dss_id = 0;
	u32 dss_id;

	ff_layout_remove_mirror(mirror);

	for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
		kfree(mirror->dss[dss_id].fh_versions);
	nfs_close_local_fh(&mirror->dss[dss_id].nfl);
		cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
		put_cred(cred);
		cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
		put_cred(cred);
		nfs_close_local_fh(&mirror->dss[dss_id].nfl);
		nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
	}

	kfree(mirror->dss);
	kfree(mirror);
@@ -371,14 +407,24 @@ ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
			free_me);
}

static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror)
{
	u32 dss_id, sum = 0;

	for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
		sum += mirror->dss[dss_id].efficiency;

	return sum;
}

static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
{
	int i, j;

	for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
		for (j = i + 1; j < fls->mirror_array_cnt; j++)
			if (fls->mirror_array[i]->dss[0].efficiency <
			    fls->mirror_array[j]->dss[0].efficiency)
			if (ff_mirror_efficiency_sum(fls->mirror_array[i]) <
			    ff_mirror_efficiency_sum(fls->mirror_array[j]))
				swap(fls->mirror_array[i],
				     fls->mirror_array[j]);
	}
@@ -398,6 +444,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
	u32 mirror_array_cnt;
	__be32 *p;
	int i, rc;
	struct nfs4_ff_layout_ds_stripe *dss_info;

	dprintk("--> %s\n", __func__);
	scratch = folio_alloc(gfp_flags, 0);
@@ -440,17 +487,24 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
		kuid_t uid;
		kgid_t gid;
		u32 fh_count, id;
		int j, dss_id = 0;
		int j, dss_id;

		rc = -EIO;
		p = xdr_inline_decode(&stream, 4);
		if (!p)
			goto out_err_free;

		// Ensure all mirrors have same stripe count.
		if (dss_count == 0)
			dss_count = be32_to_cpup(p);
		else if (dss_count != be32_to_cpup(p))
			goto out_err_free;

		/* FIXME: allow for striping? */
		if (dss_count != 1)
		if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT ||
		    dss_count == 0)
			goto out_err_free;

		if (dss_count > 1 && stripe_unit == 0)
			goto out_err_free;

		fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
@@ -464,8 +518,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
		    kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
			    gfp_flags);

		for (dss_id = 0; dss_id < dss_count; dss_id++) {
			dss_info = &fls->mirror_array[i]->dss[dss_id];
			dss_info->mirror = fls->mirror_array[i];

			/* deviceid */
		rc = decode_deviceid(&stream, &fls->mirror_array[i]->dss[dss_id].devid);
			rc = decode_deviceid(&stream, &dss_info->devid);
			if (rc)
				goto out_err_free;

@@ -474,10 +532,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
			p = xdr_inline_decode(&stream, 4);
			if (!p)
				goto out_err_free;
		fls->mirror_array[i]->dss[dss_id].efficiency = be32_to_cpup(p);
			dss_info->efficiency = be32_to_cpup(p);

			/* stateid */
		rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->dss[dss_id].stateid);
			rc = decode_pnfs_stateid(&stream, &dss_info->stateid);
			if (rc)
				goto out_err_free;

@@ -488,22 +546,22 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
				goto out_err_free;
			fh_count = be32_to_cpup(p);

		fls->mirror_array[i]->dss[dss_id].fh_versions =
			dss_info->fh_versions =
			    kcalloc(fh_count, sizeof(struct nfs_fh),
				    gfp_flags);
		if (fls->mirror_array[i]->dss[dss_id].fh_versions == NULL) {
			if (dss_info->fh_versions == NULL) {
				rc = -ENOMEM;
				goto out_err_free;
			}

			for (j = 0; j < fh_count; j++) {
				rc = decode_nfs_fh(&stream,
					   &fls->mirror_array[i]->dss[dss_id].fh_versions[j]);
						   &dss_info->fh_versions[j]);
				if (rc)
					goto out_err_free;
			}

		fls->mirror_array[i]->dss[dss_id].fh_versions_cnt = fh_count;
			dss_info->fh_versions_cnt = fh_count;

			/* user */
			rc = decode_name(&stream, &id);
@@ -523,6 +581,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
				kcred = prepare_kernel_cred(&init_task);
			else {
				unsigned int nofs_flags = memalloc_nofs_save();

				kcred = prepare_kernel_cred(&init_task);
				memalloc_nofs_restore(nofs_flags);
			}
@@ -534,21 +593,25 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
			cred = RCU_INITIALIZER(kcred);

			if (lgr->range.iomode == IOMODE_READ)
			rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred);
				rcu_assign_pointer(dss_info->ro_cred, cred);
			else
			rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred);
				rcu_assign_pointer(dss_info->rw_cred, cred);
		}

		mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
		if (mirror != fls->mirror_array[i]) {
			for (dss_id = 0; dss_id < dss_count; dss_id++) {
				dss_info = &fls->mirror_array[i]->dss[dss_id];
				/* swap cred ptrs so free_mirror will clean up old */
				if (lgr->range.iomode == IOMODE_READ) {
					cred = xchg(&mirror->dss[dss_id].ro_cred,
					    fls->mirror_array[i]->dss[dss_id].ro_cred);
				rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred);
						    dss_info->ro_cred);
					rcu_assign_pointer(dss_info->ro_cred, cred);
				} else {
					cred = xchg(&mirror->dss[dss_id].rw_cred,
					    fls->mirror_array[i]->dss[dss_id].rw_cred);
				rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred);
						    dss_info->rw_cred);
					rcu_assign_pointer(dss_info->rw_cred, cred);
				}
			}
			ff_layout_free_mirror(fls->mirror_array[i]);
			fls->mirror_array[i] = mirror;
+2 −0
Original line number Diff line number Diff line
@@ -21,6 +21,8 @@
 * due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096

#define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096

/* LAYOUTSTATS report interval in ms */
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
#define FF_LAYOUTSTATS_MAXDEV 4