Commit 0f6439f6 authored by Joanne Koong's avatar Joanne Koong Committed by Miklos Szeredi
Browse files

fuse: add kernel-enforced timeout option for requests



There are situations where fuse servers can become unresponsive or
stuck, for example if the server is deadlocked. Currently, there's no
good way to detect if a server is stuck and needs to be killed manually.

This commit adds an option for enforcing a timeout (in seconds) for
requests where if the timeout elapses without the server responding to
the request, the connection will be automatically aborted.

Please note that these timeouts are not 100% precise. For example, the
request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond
the requested timeout due to internal implementation, in order to
mitigate overhead.

[SzM: Bump the API version number]

Signed-off-by: default avatarJoanne Koong <joannelkoong@gmail.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent eef36cf6
Loading
Loading
Loading
Loading
+101 −0
Original line number Diff line number Diff line
@@ -32,6 +32,103 @@ MODULE_ALIAS("devname:fuse");

static struct kmem_cache *fuse_req_cachep;

/* Frequency (in seconds) of request timeout checks, if opted into */
#define FUSE_TIMEOUT_TIMER_FREQ 15

const unsigned long fuse_timeout_timer_freq =
	secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ);

bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list)
{
	struct fuse_req *req;

	req = list_first_entry_or_null(list, struct fuse_req, list);
	if (!req)
		return false;
	return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
}

bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
{
	int i;

	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
		if (fuse_request_expired(fc, &processing[i]))
			return true;

	return false;
}

/*
 * Check if any requests aren't being completed by the time the request timeout
 * elapses. To do so, we:
 * - check the fiq pending list
 * - check the bg queue
 * - check the fpq io and processing lists
 *
 * To make this fast, we only check against the head request on each list since
 * these are generally queued in order of creation time (eg newer requests get
 * queued to the tail). We might miss a few edge cases (eg requests transitioning
 * between lists, re-sent requests at the head of the pending list having a
 * later creation time than other requests on that list, etc.) but that is fine
 * since if the request never gets fulfilled, it will eventually be caught.
 */
void fuse_check_timeout(struct work_struct *work)
{
	struct delayed_work *dwork = to_delayed_work(work);
	struct fuse_conn *fc = container_of(dwork, struct fuse_conn,
					    timeout.work);
	struct fuse_iqueue *fiq = &fc->iq;
	struct fuse_dev *fud;
	struct fuse_pqueue *fpq;
	bool expired = false;

	if (!atomic_read(&fc->num_waiting))
	    goto out;

	spin_lock(&fiq->lock);
	expired = fuse_request_expired(fc, &fiq->pending);
	spin_unlock(&fiq->lock);
	if (expired)
		goto abort_conn;

	spin_lock(&fc->bg_lock);
	expired = fuse_request_expired(fc, &fc->bg_queue);
	spin_unlock(&fc->bg_lock);
	if (expired)
		goto abort_conn;

	spin_lock(&fc->lock);
	if (!fc->connected) {
		spin_unlock(&fc->lock);
		return;
	}
	list_for_each_entry(fud, &fc->devices, entry) {
		fpq = &fud->pq;
		spin_lock(&fpq->lock);
		if (fuse_request_expired(fc, &fpq->io) ||
		    fuse_fpq_processing_expired(fc, fpq->processing)) {
			spin_unlock(&fpq->lock);
			spin_unlock(&fc->lock);
			goto abort_conn;
		}

		spin_unlock(&fpq->lock);
	}
	spin_unlock(&fc->lock);

	if (fuse_uring_request_expired(fc))
	    goto abort_conn;

out:
	queue_delayed_work(system_wq, &fc->timeout.work,
			   fuse_timeout_timer_freq);
	return;

abort_conn:
	fuse_abort_conn(fc);
}

static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
{
	INIT_LIST_HEAD(&req->list);
@@ -40,6 +137,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
	refcount_set(&req->count, 1);
	__set_bit(FR_PENDING, &req->flags);
	req->fm = fm;
	req->create_time = jiffies;
}

static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
@@ -2291,6 +2389,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
		LIST_HEAD(to_end);
		unsigned int i;

		if (fc->timeout.req_timeout)
			cancel_delayed_work(&fc->timeout.work);

		/* Background queuing checks fc->connected under bg_lock */
		spin_lock(&fc->bg_lock);
		fc->connected = 0;
+27 −0
Original line number Diff line number Diff line
@@ -140,6 +140,33 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring)
	}
}

bool fuse_uring_request_expired(struct fuse_conn *fc)
{
	struct fuse_ring *ring = fc->ring;
	struct fuse_ring_queue *queue;
	int qid;

	if (!ring)
		return false;

	for (qid = 0; qid < ring->nr_queues; qid++) {
		queue = READ_ONCE(ring->queues[qid]);
		if (!queue)
			continue;

		spin_lock(&queue->lock);
		if (fuse_request_expired(fc, &queue->fuse_req_queue) ||
		    fuse_request_expired(fc, &queue->fuse_req_bg_queue) ||
		    fuse_fpq_processing_expired(fc, queue->fpq.processing)) {
			spin_unlock(&queue->lock);
			return true;
		}
		spin_unlock(&queue->lock);
	}

	return false;
}

void fuse_uring_destruct(struct fuse_conn *fc)
{
	struct fuse_ring *ring = fc->ring;
+6 −0
Original line number Diff line number Diff line
@@ -143,6 +143,7 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req);
bool fuse_uring_queue_bq_req(struct fuse_req *req);
bool fuse_uring_remove_pending_req(struct fuse_req *req);
bool fuse_uring_request_expired(struct fuse_conn *fc);

static inline void fuse_uring_abort(struct fuse_conn *fc)
{
@@ -200,6 +201,11 @@ static inline bool fuse_uring_remove_pending_req(struct fuse_req *req)
	return false;
}

static inline bool fuse_uring_request_expired(struct fuse_conn *fc)
{
	return false;
}

#endif /* CONFIG_FUSE_IO_URING */

#endif /* _FS_FUSE_DEV_URING_I_H */
+3 −0
Original line number Diff line number Diff line
@@ -63,5 +63,8 @@ void fuse_dev_queue_forget(struct fuse_iqueue *fiq,
void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req);
bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock);

bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list);
bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing);

#endif
+17 −0
Original line number Diff line number Diff line
@@ -44,6 +44,9 @@
/** Number of dentries for each connection in the control filesystem */
#define FUSE_CTL_NUM_DENTRIES 5

/** Frequency (in jiffies) of request timeout checks, if opted into */
extern const unsigned long fuse_timeout_timer_freq;

/** Maximum of max_pages received in init_out */
extern unsigned int fuse_max_pages_limit;

@@ -445,6 +448,8 @@ struct fuse_req {
	void *ring_entry;
	void *ring_queue;
#endif
	/** When (in jiffies) the request was created */
	unsigned long create_time;
};

struct fuse_iqueue;
@@ -941,6 +946,15 @@ struct fuse_conn {
	/**  uring connection information*/
	struct fuse_ring *ring;
#endif

	/** Only used if the connection opts into request timeouts */
	struct {
		/* Worker for checking if any requests have timed out */
		struct delayed_work work;

		/* Request timeout (in jiffies). 0 = no timeout */
		unsigned int req_timeout;
	} timeout;
};

/*
@@ -1222,6 +1236,9 @@ void fuse_request_end(struct fuse_req *req);
void fuse_abort_conn(struct fuse_conn *fc);
void fuse_wait_aborted(struct fuse_conn *fc);

/* Check if any requests timed out */
void fuse_check_timeout(struct work_struct *work);

/**
 * Invalidate inode attributes
 */
Loading