Commit 4882ba78 authored by David Howells's avatar David Howells
Browse files

afs: Fix afs_server ref accounting



The current way that afs_server refs are accounted and cleaned up sometimes
cause rmmod to hang when it is waiting for cell records to be removed.  The
problem is that the cell cleanup might occasionally happen before the
server cleanup and then there's nothing that causes the cell to
garbage-collect the remaining servers as they become inactive.

Partially fix this by:

 (1) Give each afs_server record its own management timer that rather than
     relying on the cell manager's central timer to drive each individual
     cell's maintenance work item to garbage collect servers.

     This timer is set when afs_unuse_server() reduces a server's activity
     count to zero and will schedule the server's destroyer work item upon
     firing.

 (2) Give each afs_server record its own destroyer work item that removes
     the record from the cell's database, shuts down the timer, cancels any
     pending work for itself, sends an RPC to the server to cancel
     outstanding callbacks.

     This change, in combination with the timer, obviates the need to try
     and coordinate so closely between the cell record and a bunch of other
     server records to try and tear everything down in a coordinated
     fashion.  With this, the cell record is pinned until the server RCU is
     complete and namespace/module removal will wait until all the cell
     records are removed.

 (3) Now that incoming calls are mapped to servers (and thus cells) using
     data attached to an rxrpc_peer, the UUID-to-server mapping tree is
     moved from the namespace to the cell (cell->fs_servers).  This means
     there can no longer be duplicates therein - and that allows the
     mapping tree to be simpler as there doesn't need to be a chain of
     same-UUID servers that are in different cells.

 (4) The lock protecting the UUID mapping tree is switched to an
     rw_semaphore on the cell rather than a seqlock on the namespace as
     it's now only used during mounting in contexts in which we're allowed
     to sleep.

 (5) When it comes time for a cell that is being removed to purge its set
     of servers, it just needs to iterate over them and wake them up.  Once
     a server becomes inactive, its destroyer work item will observe the
     state of the cell and immediately remove that record.

 (6) When a server record is removed, it is marked AFS_SERVER_FL_EXPIRED to
     prevent reattempts at removal.  The record will be dispatched to RCU
     for destruction once its refcount reaches 0.

 (7) The AFS_SERVER_FL_UNCREATED/CREATING flags are used to synchronise
     simultaneous creation attempts.  If one attempt fails, it will abandon
     the attempt and allow another to try again.

     Note that the record can't just be abandoned when dead as it's bound
     into a server list attached to a volume and only subject to
     replacement if the server list obtained for the volume from the VLDB
     changes.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20250224234154.2014840-15-dhowells@redhat.com/ # v1
Link: https://lore.kernel.org/r/20250310094206.801057-11-dhowells@redhat.com/ # v4
parent 40e8b52f
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -169,7 +169,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
	INIT_HLIST_HEAD(&cell->proc_volumes);
	seqlock_init(&cell->volume_lock);
	cell->fs_servers = RB_ROOT;
	seqlock_init(&cell->fs_lock);
	init_rwsem(&cell->fs_lock);
	rwlock_init(&cell->vl_servers_lock);
	cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);

@@ -838,6 +838,7 @@ static void afs_manage_cell(struct afs_cell *cell)
	/* The root volume is pinning the cell */
	afs_put_volume(cell->root_volume, afs_volume_trace_put_cell_root);
	cell->root_volume = NULL;
	afs_purge_servers(cell);
	afs_put_cell(cell, afs_cell_trace_put_destroy);
}

+2 −2
Original line number Diff line number Diff line
@@ -1653,7 +1653,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, struct afs_server *server,
	bp = call->request;
	*bp++ = htonl(FSGIVEUPALLCALLBACKS);

	call->server = afs_use_server(server, afs_server_trace_use_give_up_cb);
	call->server = afs_use_server(server, false, afs_server_trace_use_give_up_cb);
	afs_make_call(call, GFP_NOFS);
	afs_wait_for_call_to_complete(call);
	ret = call->error;
@@ -1760,7 +1760,7 @@ bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server,
		return false;

	call->key	= key;
	call->server	= afs_use_server(server, afs_server_trace_use_get_caps);
	call->server	= afs_use_server(server, false, afs_server_trace_use_get_caps);
	call->peer	= rxrpc_kernel_get_peer(estate->addresses->addrs[addr_index].peer);
	call->probe	= afs_get_endpoint_state(estate, afs_estate_trace_get_getcaps);
	call->probe_index = addr_index;
+29 −25
Original line number Diff line number Diff line
@@ -302,18 +302,11 @@ struct afs_net {
	 * cell, but in practice, people create aliases and subsets and there's
	 * no easy way to distinguish them.
	 */
	seqlock_t		fs_lock;	/* For fs_servers, fs_probe_*, fs_proc */
	struct rb_root		fs_servers;	/* afs_server (by server UUID or address) */
	seqlock_t		fs_lock;	/* For fs_probe_*, fs_proc */
	struct list_head	fs_probe_fast;	/* List of afs_server to probe at 30s intervals */
	struct list_head	fs_probe_slow;	/* List of afs_server to probe at 5m intervals */
	struct hlist_head	fs_proc;	/* procfs servers list */

	struct hlist_head	fs_addresses;	/* afs_server (by lowest IPv6 addr) */
	seqlock_t		fs_addr_lock;	/* For fs_addresses[46] */

	struct work_struct	fs_manager;
	struct timer_list	fs_timer;

	struct work_struct	fs_prober;
	struct timer_list	fs_probe_timer;
	atomic_t		servers_outstanding;
@@ -409,7 +402,7 @@ struct afs_cell {

	/* Active fileserver interaction state. */
	struct rb_root		fs_servers;	/* afs_server (by server UUID) */
	seqlock_t		fs_lock;	/* For fs_servers  */
	struct rw_semaphore	fs_lock;	/* For fs_servers  */

	/* VL server list. */
	rwlock_t		vl_servers_lock; /* Lock on vl_servers */
@@ -544,22 +537,22 @@ struct afs_server {
	};

	struct afs_cell		*cell;		/* Cell to which belongs (pins ref) */
	struct rb_node		uuid_rb;	/* Link in net->fs_servers */
	struct afs_server __rcu	*uuid_next;	/* Next server with same UUID */
	struct afs_server	*uuid_prev;	/* Previous server with same UUID */
	struct list_head	probe_link;	/* Link in net->fs_probe_list */
	struct hlist_node	addr_link;	/* Link in net->fs_addresses6 */
	struct rb_node		uuid_rb;	/* Link in cell->fs_servers */
	struct list_head	probe_link;	/* Link in net->fs_probe_* */
	struct hlist_node	proc_link;	/* Link in net->fs_proc */
	struct list_head	volumes;	/* RCU list of afs_server_entry objects */
	struct afs_server	*gc_next;	/* Next server in manager's list */
	struct work_struct	destroyer;	/* Work item to try and destroy a server */
	struct timer_list	timer;		/* Management timer */
	time64_t		unuse_time;	/* Time at which last unused */
	unsigned long		flags;
#define AFS_SERVER_FL_RESPONDING 0		/* The server is responding */
#define AFS_SERVER_FL_UPDATING	1
#define AFS_SERVER_FL_NEEDS_UPDATE 2		/* Fileserver address list is out of date */
#define AFS_SERVER_FL_NOT_READY	4		/* The record is not ready for use */
#define AFS_SERVER_FL_NOT_FOUND	5		/* VL server says no such server */
#define AFS_SERVER_FL_VL_FAIL	6		/* Failed to access VL server */
#define AFS_SERVER_FL_UNCREATED	3		/* The record needs creating */
#define AFS_SERVER_FL_CREATING	4		/* The record is being created */
#define AFS_SERVER_FL_EXPIRED	5		/* The record has expired */
#define AFS_SERVER_FL_NOT_FOUND	6		/* VL server says no such server */
#define AFS_SERVER_FL_VL_FAIL	7		/* Failed to access VL server */
#define AFS_SERVER_FL_MAY_HAVE_CB 8		/* May have callbacks on this fileserver */
#define AFS_SERVER_FL_IS_YFS	16		/* Server is YFS not AFS */
#define AFS_SERVER_FL_NO_IBULK	17		/* Fileserver doesn't support FS.InlineBulkStatus */
@@ -569,6 +562,7 @@ struct afs_server {
	atomic_t		active;		/* Active user count */
	u32			addr_version;	/* Address list version */
	u16			service_id;	/* Service ID we're using. */
	short			create_error;	/* Creation error */
	unsigned int		rtt;		/* Server's current RTT in uS */
	unsigned int		debug_id;	/* Debugging ID for traces */

@@ -1513,19 +1507,29 @@ extern void __exit afs_clean_up_permit_cache(void);
extern spinlock_t afs_server_peer_lock;

struct afs_server *afs_find_server(const struct rxrpc_peer *peer);
extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *);
extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32);
extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace);
extern struct afs_server *afs_use_server(struct afs_server *, enum afs_server_trace);
extern void afs_unuse_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
extern void afs_unuse_server_notime(struct afs_net *, struct afs_server *, enum afs_server_trace);
struct afs_server *afs_use_server(struct afs_server *server, bool activate,
				  enum afs_server_trace reason);
void afs_unuse_server(struct afs_net *net, struct afs_server *server,
		      enum afs_server_trace reason);
void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
			     enum afs_server_trace reason);
extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace);
extern void afs_manage_servers(struct work_struct *);
extern void afs_servers_timer(struct timer_list *);
void afs_purge_servers(struct afs_cell *cell);
extern void afs_fs_probe_timer(struct timer_list *);
extern void __net_exit afs_purge_servers(struct afs_net *);
void __net_exit afs_wait_for_servers(struct afs_net *net);
bool afs_check_server_record(struct afs_operation *op, struct afs_server *server, struct key *key);

static inline void afs_see_server(struct afs_server *server, enum afs_server_trace trace)
{
	int r = refcount_read(&server->ref);
	int a = atomic_read(&server->active);

	trace_afs_server(server->debug_id, r, a, trace);

}

static inline void afs_inc_servers_outstanding(struct afs_net *net)
{
	atomic_inc(&net->servers_outstanding);
+2 −8
Original line number Diff line number Diff line
@@ -86,16 +86,10 @@ static int __net_init afs_net_init(struct net *net_ns)
	INIT_HLIST_HEAD(&net->proc_cells);

	seqlock_init(&net->fs_lock);
	net->fs_servers = RB_ROOT;
	INIT_LIST_HEAD(&net->fs_probe_fast);
	INIT_LIST_HEAD(&net->fs_probe_slow);
	INIT_HLIST_HEAD(&net->fs_proc);

	INIT_HLIST_HEAD(&net->fs_addresses);
	seqlock_init(&net->fs_addr_lock);

	INIT_WORK(&net->fs_manager, afs_manage_servers);
	timer_setup(&net->fs_timer, afs_servers_timer, 0);
	INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher);
	timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0);
	atomic_set(&net->servers_outstanding, 1);
@@ -131,7 +125,7 @@ static int __net_init afs_net_init(struct net *net_ns)
	net->live = false;
	afs_fs_probe_cleanup(net);
	afs_cell_purge(net);
	afs_purge_servers(net);
	afs_wait_for_servers(net);
error_cell_init:
	net->live = false;
	afs_proc_cleanup(net);
@@ -153,7 +147,7 @@ static void __net_exit afs_net_exit(struct net *net_ns)
	net->live = false;
	afs_fs_probe_cleanup(net);
	afs_cell_purge(net);
	afs_purge_servers(net);
	afs_wait_for_servers(net);
	afs_close_socket(net);
	afs_proc_cleanup(net);
	afs_put_sysnames(net->sysnames);
+248 −316

File changed.

Preview size limit exceeded, changes collapsed.

Loading