Commit 48db8923 authored by Chuck Lever's avatar Chuck Lever
Browse files

NFSD: Defer sub-object cleanup in export put callbacks



svc_export_put() calls path_put() and auth_domain_put() immediately
when the last reference drops, before the RCU grace period. RCU
readers in e_show() and c_show() access both ex_path (via
seq_path/d_path) and ex_client->name (via seq_escape) without
holding a reference. If cache_clean removes the entry and drops the
last reference concurrently, the sub-objects are freed while still
in use, producing a NULL pointer dereference in d_path.

Commit 25307664 ("nfsd: fix UAF when access ex_uuid or
ex_stats") moved kfree of ex_uuid and ex_stats into the
call_rcu callback, but left path_put() and auth_domain_put() running
before the grace period because both may sleep and call_rcu
callbacks execute in softirq context.

Replace call_rcu/kfree_rcu with queue_rcu_work(), which defers the
callback until after the RCU grace period and executes it in process
context where sleeping is permitted. This allows path_put() and
auth_domain_put() to be moved into the deferred callback alongside
the other resource releases. Apply the same fix to expkey_put(),
which has the identical pattern with ek_path and ek_client.

A dedicated workqueue scopes the shutdown drain to only NFSD
export release work items; flushing the shared
system_unbound_wq would stall on unrelated work from other
subsystems. nfsd_export_shutdown() uses rcu_barrier() followed
by flush_workqueue() to ensure all deferred release callbacks
complete before the export caches are destroyed.

Reported-by: default avatarMisbah Anjum N <misanjum@linux.ibm.com>
Closes: https://lore.kernel.org/linux-nfs/dcd371d3a95815a84ba7de52cef447b8@linux.ibm.com/


Fixes: c224edca ("nfsd: no need get cache ref when protected by rcu")
Fixes: 1b10f0b6 ("SUNRPC: no need get cache ref when protected by rcu")
Cc: stable@vger.kernel.org
Reviwed-by: default avatarJeff Layton <jlayton@kernel.org>
Reviewed-by: default avatarNeilBrown <neil@brown.name>
Tested-by: default avatarOlga Kornievskaia <okorniev@redhat.com>
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent 36441017
Loading
Loading
Loading
Loading
+54 −9
Original line number Diff line number Diff line
@@ -36,19 +36,30 @@
 * second map contains a reference to the entry in the first map.
 */

static struct workqueue_struct *nfsd_export_wq;

#define	EXPKEY_HASHBITS		8
#define	EXPKEY_HASHMAX		(1 << EXPKEY_HASHBITS)
#define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)

static void expkey_put(struct kref *ref)
static void expkey_release(struct work_struct *work)
{
	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
	struct svc_expkey *key = container_of(to_rcu_work(work),
					      struct svc_expkey, ek_rwork);

	if (test_bit(CACHE_VALID, &key->h.flags) &&
	    !test_bit(CACHE_NEGATIVE, &key->h.flags))
		path_put(&key->ek_path);
	auth_domain_put(key->ek_client);
	kfree_rcu(key, ek_rcu);
	kfree(key);
}

static void expkey_put(struct kref *ref)
{
	struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);

	INIT_RCU_WORK(&key->ek_rwork, expkey_release);
	queue_rcu_work(nfsd_export_wq, &key->ek_rwork);
}

static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -353,11 +364,13 @@ static void export_stats_destroy(struct export_stats *stats)
					    EXP_STATS_COUNTERS_NUM);
}

static void svc_export_release(struct rcu_head *rcu_head)
static void svc_export_release(struct work_struct *work)
{
	struct svc_export *exp = container_of(rcu_head, struct svc_export,
			ex_rcu);
	struct svc_export *exp = container_of(to_rcu_work(work),
					      struct svc_export, ex_rwork);

	path_put(&exp->ex_path);
	auth_domain_put(exp->ex_client);
	nfsd4_fslocs_free(&exp->ex_fslocs);
	export_stats_destroy(exp->ex_stats);
	kfree(exp->ex_stats);
@@ -369,9 +382,8 @@ static void svc_export_put(struct kref *ref)
{
	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);

	path_put(&exp->ex_path);
	auth_domain_put(exp->ex_client);
	call_rcu(&exp->ex_rcu, svc_export_release);
	INIT_RCU_WORK(&exp->ex_rwork, svc_export_release);
	queue_rcu_work(nfsd_export_wq, &exp->ex_rwork);
}

static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -1478,6 +1490,36 @@ const struct seq_operations nfs_exports_op = {
	.show	= e_show,
};

/**
 * nfsd_export_wq_init - allocate the export release workqueue
 *
 * Called once at module load. The workqueue runs deferred svc_export and
 * svc_expkey release work scheduled by queue_rcu_work() in the cache put
 * callbacks.
 *
 * Return values:
 *   %0: workqueue allocated
 *   %-ENOMEM: allocation failed
 */
int nfsd_export_wq_init(void)
{
	nfsd_export_wq = alloc_workqueue("nfsd_export", WQ_UNBOUND, 0);
	if (!nfsd_export_wq)
		return -ENOMEM;
	return 0;
}

/**
 * nfsd_export_wq_shutdown - drain and free the export release workqueue
 *
 * Called once at module unload. Per-namespace teardown in
 * nfsd_export_shutdown() has already drained all deferred work.
 */
void nfsd_export_wq_shutdown(void)
{
	destroy_workqueue(nfsd_export_wq);
}

/*
 * Initialize the exports module.
 */
@@ -1539,6 +1581,9 @@ nfsd_export_shutdown(struct net *net)

	cache_unregister_net(nn->svc_expkey_cache, net);
	cache_unregister_net(nn->svc_export_cache, net);
	/* Drain deferred export and expkey release work. */
	rcu_barrier();
	flush_workqueue(nfsd_export_wq);
	cache_destroy_net(nn->svc_expkey_cache, net);
	cache_destroy_net(nn->svc_export_cache, net);
	svcauth_unix_purge(net);
+5 −2
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@

#include <linux/sunrpc/cache.h>
#include <linux/percpu_counter.h>
#include <linux/workqueue.h>
#include <uapi/linux/nfsd/export.h>
#include <linux/nfs4.h>

@@ -75,7 +76,7 @@ struct svc_export {
	u32			ex_layout_types;
	struct nfsd4_deviceid_map *ex_devid_map;
	struct cache_detail	*cd;
	struct rcu_head		ex_rcu;
	struct rcu_work		ex_rwork;
	unsigned long		ex_xprtsec_modes;
	struct export_stats	*ex_stats;
};
@@ -92,7 +93,7 @@ struct svc_expkey {
	u32			ek_fsid[6];

	struct path		ek_path;
	struct rcu_head		ek_rcu;
	struct rcu_work		ek_rwork;
};

#define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
@@ -110,6 +111,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
/*
 * Function declarations
 */
int			nfsd_export_wq_init(void);
void			nfsd_export_wq_shutdown(void);
int			nfsd_export_init(struct net *);
void			nfsd_export_shutdown(struct net *);
void			nfsd_export_flush(struct net *);
+7 −1
Original line number Diff line number Diff line
@@ -2259,9 +2259,12 @@ static int __init init_nfsd(void)
	if (retval)
		goto out_free_pnfs;
	nfsd_lockd_init();	/* lockd->nfsd callbacks */
	retval = nfsd_export_wq_init();
	if (retval)
		goto out_free_lockd;
	retval = register_pernet_subsys(&nfsd_net_ops);
	if (retval < 0)
		goto out_free_lockd;
		goto out_free_export_wq;
	retval = register_cld_notifier();
	if (retval)
		goto out_free_subsys;
@@ -2290,6 +2293,8 @@ static int __init init_nfsd(void)
	unregister_cld_notifier();
out_free_subsys:
	unregister_pernet_subsys(&nfsd_net_ops);
out_free_export_wq:
	nfsd_export_wq_shutdown();
out_free_lockd:
	nfsd_lockd_shutdown();
	nfsd_drc_slab_free();
@@ -2310,6 +2315,7 @@ static void __exit exit_nfsd(void)
	nfsd4_destroy_laundry_wq();
	unregister_cld_notifier();
	unregister_pernet_subsys(&nfsd_net_ops);
	nfsd_export_wq_shutdown();
	nfsd_drc_slab_free();
	nfsd_lockd_shutdown();
	nfsd4_free_slabs();