Commit 0c734c5e authored by Jens Axboe's avatar Jens Axboe
Browse files

block: improve struct request_queue layout



It's clearly been a while since someone looked at this, so I gave it a
quick shot. There are few issues in here:

- Random bundling of members that are mostly read-only and often written
- Random holes that need not be there

This moves the most frequently used bits into cacheline 1 and 2, with
the 2nd one being more write intensive than the first one, which is
basically read-only.

Outside of making this work a bit more efficiently, it also reduces the
size of struct request_queue for my test setup from 864 bytes (spanning
14 cachelines!) to 832 bytes and 13 cachelines.

Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/d2b7b61c-4868-45c0-9060-4f9c73de9d7e@kernel.dk


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 6ef02df1
Loading
Loading
Loading
Loading
+46 −43
Original line number Diff line number Diff line
@@ -367,59 +367,51 @@ struct blk_independent_access_ranges {
};

struct request_queue {
	struct request		*last_merge;
	struct elevator_queue	*elevator;

	struct percpu_ref	q_usage_counter;
	/*
	 * The queue owner gets to use this for whatever they like.
	 * ll_rw_blk doesn't touch it.
	 */
	void			*queuedata;

	struct blk_queue_stats	*stats;
	struct rq_qos		*rq_qos;
	struct mutex		rq_qos_mutex;
	struct elevator_queue	*elevator;

	const struct blk_mq_ops	*mq_ops;

	/* sw queues */
	struct blk_mq_ctx __percpu	*queue_ctx;

	/*
	 * various queue flags, see QUEUE_* below
	 */
	unsigned long		queue_flags;

	unsigned int		rq_timeout;

	unsigned int		queue_depth;

	refcount_t		refs;

	/* hw dispatch queues */
	struct xarray		hctx_table;
	unsigned int		nr_hw_queues;
	struct xarray		hctx_table;

	/*
	 * The queue owner gets to use this for whatever they like.
	 * ll_rw_blk doesn't touch it.
	 */
	void			*queuedata;

	/*
	 * various queue flags, see QUEUE_* below
	 */
	unsigned long		queue_flags;
	/*
	 * Number of contexts that have called blk_set_pm_only(). If this
	 * counter is above zero then only RQF_PM requests are processed.
	 */
	atomic_t		pm_only;
	struct percpu_ref	q_usage_counter;

	/*
	 * ida allocated id for this queue.  Used to index queues from
	 * ioctx.
	 */
	int			id;
	struct request		*last_merge;

	spinlock_t		queue_lock;

	struct gendisk		*disk;
	int			quiesce_depth;

	refcount_t		refs;
	struct gendisk		*disk;

	/*
	 * mq queue kobject
	 */
	struct kobject *mq_kobj;

	struct queue_limits	limits;

#ifdef  CONFIG_BLK_DEV_INTEGRITY
	struct blk_integrity integrity;
#endif	/* CONFIG_BLK_DEV_INTEGRITY */
@@ -430,24 +422,40 @@ struct request_queue {
#endif

	/*
	 * queue settings
	 * Number of contexts that have called blk_set_pm_only(). If this
	 * counter is above zero then only RQF_PM requests are processed.
	 */
	unsigned long		nr_requests;	/* Max # of requests */
	atomic_t		pm_only;

	struct blk_queue_stats	*stats;
	struct rq_qos		*rq_qos;
	struct mutex		rq_qos_mutex;

	/*
	 * ida allocated id for this queue.  Used to index queues from
	 * ioctx.
	 */
	int			id;

	unsigned int		dma_pad_mask;

	/*
	 * queue settings
	 */
	unsigned long		nr_requests;	/* Max # of requests */

#ifdef CONFIG_BLK_INLINE_ENCRYPTION
	struct blk_crypto_profile *crypto_profile;
	struct kobject *crypto_kobject;
#endif

	unsigned int		rq_timeout;

	struct timer_list	timeout;
	struct work_struct	timeout_work;

	atomic_t		nr_active_requests_shared_tags;

	unsigned int		required_elevator_features;

	struct blk_mq_tags	*sched_shared_tags;

	struct list_head	icq_list;
@@ -458,11 +466,12 @@ struct request_queue {
	struct mutex		blkcg_mutex;
#endif

	struct queue_limits	limits;
	int			node;

	unsigned int		required_elevator_features;
	spinlock_t		requeue_lock;
	struct list_head	requeue_list;
	struct delayed_work	requeue_work;

	int			node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
	struct blk_trace __rcu	*blk_trace;
#endif
@@ -472,10 +481,6 @@ struct request_queue {
	struct blk_flush_queue	*fq;
	struct list_head	flush_list;

	struct list_head	requeue_list;
	spinlock_t		requeue_lock;
	struct delayed_work	requeue_work;

	struct mutex		sysfs_lock;
	struct mutex		sysfs_dir_lock;

@@ -500,8 +505,6 @@ struct request_queue {
	 */
	struct mutex		mq_freeze_lock;

	int			quiesce_depth;

	struct blk_mq_tag_set	*tag_set;
	struct list_head	tag_set_list;