Commit 14a51045 authored by Al Viro's avatar Al Viro
Browse files

get rid of busy-waiting in shrink_dcache_tree()



If shrink_dcache_tree() runs into a potential victim that is already
dying, it must wait for that dentry to go away.  To avoid busy-waiting
we need some object to wait on and a way for dentry_unlist() to see that
we need to be notified.

The obvious place for the object to wait on would be on our stack frame.
We will store a pointer to that object (struct completion_list) in victim
dentry; if there's more than one thread wanting to wait for the same
dentry to finish dying, we'll have their instances linked into a list,
with reference in dentry pointing to the head of that list.

* new object - struct completion_list.  A pair of struct completion and
pointer to the next instance.  That's what shrink_dcache_tree() will wait
on if needed.

* add a new member (->waiters, opaque pointer to struct completion_list)
to struct dentry.  It is defined for negative live dentries that are
not in-lookup ones and it will remain NULL for almost all of them.

It does not conflict with ->d_rcu (defined for killed dentries), ->d_alias
(defined for positive dentries, all live) or ->d_in_lookup_hash (defined
for in-lookup dentries, all live negative).  That allows to colocate
all four members.

* make sure that all places where dentry enters the state where ->waiters
is defined (live, negative, not-in-lookup) initialize ->waiters to NULL.

* if select_collect2() runs into a dentry that is already dying, have
its caller insert a local instance of struct completion_list into the
head of the list hanging off dentry->waiters and wait for completion.

* if dentry_unlist() sees non-NULL ->waiters, have it carefully walk
through the completion_list instances in that list, calling complete()
for each.

For now struct completion_list is local to fs/dcache.c; it's obviously
dentry-agnostic, and it can be trivially lifted into linux/completion.h
if somebody finds a reason to do so...

Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 5408c22b
Loading
Loading
Loading
Loading
+73 −6
Original line number Diff line number Diff line
@@ -456,6 +456,15 @@ static void dentry_unlink_inode(struct dentry * dentry)
	raw_write_seqcount_begin(&dentry->d_seq);
	__d_clear_type_and_inode(dentry);
	hlist_del_init(&dentry->d_alias);
	/*
	 * dentry becomes negative, so the space occupied by ->d_alias
	 * belongs to ->waiters now; we could use __hlist_del() instead
	 * of hlist_del_init(), if not for the stunt pulled by nfs
	 * dummy root dentries - positive dentry *not* included into
	 * the alias list of its inode.  Open-coding hlist_del_init()
	 * and removing zeroing would be too clumsy...
	 */
	dentry->waiters = NULL;
	raw_write_seqcount_end(&dentry->d_seq);
	spin_unlock(&dentry->d_lock);
	spin_unlock(&inode->i_lock);
@@ -605,6 +614,44 @@ void d_drop(struct dentry *dentry)
}
EXPORT_SYMBOL(d_drop);

struct completion_list {
	struct completion_list *next;
	struct completion completion;
};

/*
 *  shrink_dcache_tree() needs to be notified when dentry in process of
 *  being evicted finally gets unlisted.  Such dentries are
 *	already with negative ->d_count
 *	already negative
 *	already not in in-lookup hash
 *	reachable only via ->d_sib.
 *
 *  Use ->waiters for a single-linked list of struct completion_list of
 *  waiters.
 */
static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p)
{
	struct completion_list *v = dentry->waiters;
	init_completion(&p->completion);
	p->next = v;
	dentry->waiters = p;
}

static inline void d_complete_waiters(struct dentry *dentry)
{
	struct completion_list *v = dentry->waiters;
	if (unlikely(v)) {
		/* some shrink_dcache_tree() instances are waiting */
		dentry->waiters = NULL;
		while (v) {
			struct completion *r = &v->completion;
			v = v->next;
			complete(r);
		}
	}
}

static inline void dentry_unlist(struct dentry *dentry)
{
	struct dentry *next;
@@ -613,6 +660,7 @@ static inline void dentry_unlist(struct dentry *dentry)
	 * attached to the dentry tree
	 */
	dentry->d_flags |= DCACHE_DENTRY_KILLED;
	d_complete_waiters(dentry);
	if (unlikely(hlist_unhashed(&dentry->d_sib)))
		return;
	__hlist_del(&dentry->d_sib);
@@ -1569,6 +1617,10 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
			return D_WALK_QUIT;
		}
		to_shrink_list(dentry, &data->dispose);
	} else if (dentry->d_lockref.count < 0) {
		rcu_read_lock();
		data->victim = dentry;
		return D_WALK_QUIT;
	}
	/*
	 * We can return to the caller if we have found some (this
@@ -1608,12 +1660,27 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
		data.victim = NULL;
		d_walk(parent, &data, select_collect2);
		if (data.victim) {
			spin_lock(&data.victim->d_lock);
			if (!lock_for_kill(data.victim)) {
				spin_unlock(&data.victim->d_lock);
			struct dentry *v = data.victim;

			spin_lock(&v->d_lock);
			if (v->d_lockref.count < 0 &&
			    !(v->d_flags & DCACHE_DENTRY_KILLED)) {
				struct completion_list wait;
				// It's busy dying; have it notify us once
				// it becomes invisible to d_walk().
				d_add_waiter(v, &wait);
				spin_unlock(&v->d_lock);
				rcu_read_unlock();
				if (!list_empty(&data.dispose))
					shrink_dentry_list(&data.dispose);
				wait_for_completion(&wait.completion);
				continue;
			}
			if (!lock_for_kill(v)) {
				spin_unlock(&v->d_lock);
				rcu_read_unlock();
			} else {
				shrink_kill(data.victim);
				shrink_kill(v);
			}
		}
		if (!list_empty(&data.dispose))
@@ -1787,7 +1854,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
	INIT_HLIST_BL_NODE(&dentry->d_hash);
	INIT_LIST_HEAD(&dentry->d_lru);
	INIT_HLIST_HEAD(&dentry->d_children);
	INIT_HLIST_NODE(&dentry->d_alias);
	dentry->waiters = NULL;
	INIT_HLIST_NODE(&dentry->d_sib);

	if (dentry->d_op && dentry->d_op->d_init) {
@@ -2729,7 +2796,7 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
	d_wait = dentry->d_wait;
	dentry->d_wait = NULL;
	hlist_bl_unlock(b);
	INIT_HLIST_NODE(&dentry->d_alias);
	dentry->waiters = NULL;
	INIT_LIST_HEAD(&dentry->d_lru);
	return d_wait;
}
+15 −3
Original line number Diff line number Diff line
@@ -88,6 +88,7 @@ union shortname_store {

#define d_lock	d_lockref.lock
#define d_iname d_shortname.string
struct completion_list;

struct dentry {
	/* RCU lookup touched fields */
@@ -122,12 +123,23 @@ struct dentry {
	struct hlist_node d_sib;	/* child of parent list */
	struct hlist_head d_children;	/* our children */
	/*
	 * d_alias and d_rcu can share memory
	 * the following members can share memory - their uses are
	 * mutually exclusive.
	 */
	union {
		struct hlist_node d_alias;	/* inode alias list */
		struct hlist_bl_node d_in_lookup_hash;	/* only for in-lookup ones */
		/* positives: inode alias list */
		struct hlist_node d_alias;
		/* in-lookup ones (all negative, live): hash chain */
		struct hlist_bl_node d_in_lookup_hash;
		/* killed ones: (already negative) used to schedule freeing */
	 	struct rcu_head d_rcu;
		/*
		 * live non-in-lookup negatives: used if shrink_dcache_tree()
		 * races with eviction by another thread and needs to wait for
		 * this dentry to get killed .  Remains NULL for almost all
		 * negative dentries.
		 */
		struct completion_list *waiters;
	};
};