Commit 41b73435 authored by Christian Brauner's avatar Christian Brauner
Browse files

Merge patch series "fs: add i_state helpers"

Christian Brauner <brauner@kernel.org> says:

I've recently looked for some free space in struct inode again because
of some exec kerfuffle we had and while my idea didn't turn into
anything I noticed that we often waste bytes when using wait bit
operations. So I set out to switch that to another mechanism that would
allow us to free up bytes. So this is an attempt to turn i_state from an
unsigned long into an u32 using the individual bytes of i_state as
addresses for the wait var event mechanism (Thanks to Linus for that idea.).

This survives LTP, xfstests on various filesystems, and will-it-scale.

* patches from https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org:
  inode: make i_state a u32
  inode: port __I_LRU_ISOLATING to var event
  inode: port __I_NEW to var event
  inode: port __I_SYNC to var event
  fs: reorder i_state bits
  fs: add i_state helpers

Link: https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parents 88b1afbf 2b111edb
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -1644,14 +1644,16 @@ void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
				break;
			}
		} else if (clean_pass && this_pass_clean) {
			wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
			DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
			struct wait_bit_queue_entry wqe;
			struct wait_queue_head *wq_head;

			prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
			wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
			prepare_to_wait_event(wq_head, &wqe.wq_entry,
					      TASK_UNINTERRUPTIBLE);
			mutex_unlock(&c->vfs_inodes_lock);

			schedule();
			finish_wait(wq, &wait.wq_entry);
			finish_wait(wq_head, &wqe.wq_entry);
			goto again;
		}
	}
+6 −1
Original line number Diff line number Diff line
@@ -1908,8 +1908,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
	__d_instantiate(entry, inode);
	WARN_ON(!(inode->i_state & I_NEW));
	inode->i_state &= ~I_NEW & ~I_CREATING;
	/*
	 * Pairs with the barrier in prepare_to_wait_event() to make sure
	 * ___wait_var_event() either sees the bit cleared or
	 * waitqueue_active() check in wake_up_var() sees the waiter.
	 */
	smp_mb();
	wake_up_bit(&inode->i_state, __I_NEW);
	inode_wake_up_bit(inode, __I_NEW);
	spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_instantiate_new);
+29 −16
Original line number Diff line number Diff line
@@ -1386,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)

static void inode_sync_complete(struct inode *inode)
{
	assert_spin_locked(&inode->i_lock);

	inode->i_state &= ~I_SYNC;
	/* If inode is clean an unused, put it into LRU now... */
	inode_add_lru(inode);
	/* Waiters must see I_SYNC cleared before being woken up */
	smp_mb();
	wake_up_bit(&inode->i_state, __I_SYNC);
	/* Called with inode->i_lock which ensures memory ordering. */
	inode_wake_up_bit(inode, __I_SYNC);
}

static bool inode_dirtied_after(struct inode *inode, unsigned long t)
@@ -1512,17 +1513,25 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
 */
void inode_wait_for_writeback(struct inode *inode)
{
	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
	wait_queue_head_t *wqh;
	struct wait_bit_queue_entry wqe;
	struct wait_queue_head *wq_head;

	assert_spin_locked(&inode->i_lock);

	if (!(inode->i_state & I_SYNC))
		return;

	lockdep_assert_held(&inode->i_lock);
	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
	while (inode->i_state & I_SYNC) {
	wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
	for (;;) {
		prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
		/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
		if (!(inode->i_state & I_SYNC))
			break;
		spin_unlock(&inode->i_lock);
		__wait_on_bit(wqh, &wq, bit_wait,
			      TASK_UNINTERRUPTIBLE);
		schedule();
		spin_lock(&inode->i_lock);
	}
	finish_wait(wq_head, &wqe.wq_entry);
}

/*
@@ -1533,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode)
static void inode_sleep_on_writeback(struct inode *inode)
	__releases(inode->i_lock)
{
	DEFINE_WAIT(wait);
	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
	int sleep;
	struct wait_bit_queue_entry wqe;
	struct wait_queue_head *wq_head;
	bool sleep;

	assert_spin_locked(&inode->i_lock);

	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
	sleep = inode->i_state & I_SYNC;
	wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
	prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
	/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
	sleep = !!(inode->i_state & I_SYNC);
	spin_unlock(&inode->i_lock);
	if (sleep)
		schedule();
	finish_wait(wqh, &wait);
	finish_wait(wq_head, &wqe.wq_entry);
}

/*
+54 −16
Original line number Diff line number Diff line
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
		inode->i_state |= I_REFERENCED;
}

struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
					    struct inode *inode, u32 bit)
{
        void *bit_address;

        bit_address = inode_state_wait_address(inode, bit);
        init_wait_var_entry(wqe, bit_address, 0);
        return __var_waitqueue(bit_address);
}
EXPORT_SYMBOL(inode_bit_waitqueue);

/*
 * Add inode to LRU if needed (inode is unused and clean).
 *
@@ -500,24 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
	spin_lock(&inode->i_lock);
	WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
	inode->i_state &= ~I_LRU_ISOLATING;
	smp_mb();
	wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
	/* Called with inode->i_lock which ensures memory ordering. */
	inode_wake_up_bit(inode, __I_LRU_ISOLATING);
	spin_unlock(&inode->i_lock);
}

static void inode_wait_for_lru_isolating(struct inode *inode)
{
	struct wait_bit_queue_entry wqe;
	struct wait_queue_head *wq_head;

	lockdep_assert_held(&inode->i_lock);
	if (inode->i_state & I_LRU_ISOLATING) {
		DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
		wait_queue_head_t *wqh;
	if (!(inode->i_state & I_LRU_ISOLATING))
		return;

		wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
	wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
	for (;;) {
		prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
		/*
		 * Checking I_LRU_ISOLATING with inode->i_lock guarantees
		 * memory ordering.
		 */
		if (!(inode->i_state & I_LRU_ISOLATING))
			break;
		spin_unlock(&inode->i_lock);
		__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
		schedule();
		spin_lock(&inode->i_lock);
		WARN_ON(inode->i_state & I_LRU_ISOLATING);
	}
	finish_wait(wq_head, &wqe.wq_entry);
	WARN_ON(inode->i_state & I_LRU_ISOLATING);
}

/**
@@ -723,7 +745,13 @@ static void evict(struct inode *inode)
	 * used as an indicator whether blocking on it is safe.
	 */
	spin_lock(&inode->i_lock);
	wake_up_bit(&inode->i_state, __I_NEW);
	/*
	 * Pairs with the barrier in prepare_to_wait_event() to make sure
	 * ___wait_var_event() either sees the bit cleared or
	 * waitqueue_active() check in wake_up_var() sees the waiter.
	 */
	smp_mb();
	inode_wake_up_bit(inode, __I_NEW);
	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
	spin_unlock(&inode->i_lock);

@@ -1135,8 +1163,13 @@ void unlock_new_inode(struct inode *inode)
	spin_lock(&inode->i_lock);
	WARN_ON(!(inode->i_state & I_NEW));
	inode->i_state &= ~I_NEW & ~I_CREATING;
	/*
	 * Pairs with the barrier in prepare_to_wait_event() to make sure
	 * ___wait_var_event() either sees the bit cleared or
	 * waitqueue_active() check in wake_up_var() sees the waiter.
	 */
	smp_mb();
	wake_up_bit(&inode->i_state, __I_NEW);
	inode_wake_up_bit(inode, __I_NEW);
	spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
@@ -1147,8 +1180,13 @@ void discard_new_inode(struct inode *inode)
	spin_lock(&inode->i_lock);
	WARN_ON(!(inode->i_state & I_NEW));
	inode->i_state &= ~I_NEW;
	/*
	 * Pairs with the barrier in prepare_to_wait_event() to make sure
	 * ___wait_var_event() either sees the bit cleared or
	 * waitqueue_active() check in wake_up_var() sees the waiter.
	 */
	smp_mb();
	wake_up_bit(&inode->i_state, __I_NEW);
	inode_wake_up_bit(inode, __I_NEW);
	spin_unlock(&inode->i_lock);
	iput(inode);
}
@@ -2337,8 +2375,8 @@ EXPORT_SYMBOL(inode_needs_sync);
 */
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{
	wait_queue_head_t *wq;
	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
	struct wait_bit_queue_entry wqe;
	struct wait_queue_head *wq_head;

	/*
	 * Handle racing against evict(), see that routine for more details.
@@ -2349,14 +2387,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
		return;
	}

	wq = bit_waitqueue(&inode->i_state, __I_NEW);
	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
	wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
	prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
	spin_unlock(&inode->i_lock);
	rcu_read_unlock();
	if (is_inode_hash_locked)
		spin_unlock(&inode_hash_lock);
	schedule();
	finish_wait(wq, &wait.wq_entry);
	finish_wait(wq_head, &wqe.wq_entry);
	if (is_inode_hash_locked)
		spin_lock(&inode_hash_lock);
	rcu_read_lock();
+38 −18
Original line number Diff line number Diff line
@@ -681,7 +681,8 @@ struct inode {
#endif

	/* Misc */
	unsigned long		i_state;
	u32			i_state;
	/* 32-bit hole */
	struct rw_semaphore	i_rwsem;

	unsigned long		dirtied_when;	/* jiffies of first dirtying */
@@ -744,6 +745,21 @@ struct inode {
	void			*i_private; /* fs or device private pointer */
} __randomize_layout;

/*
 * Get bit address from inode->i_state to use with wait_var_event()
 * infrastructre.
 */
#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit))

struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
					    struct inode *inode, u32 bit);

static inline void inode_wake_up_bit(struct inode *inode, u32 bit)
{
	/* Caller is responsible for correct memory barriers. */
	wake_up_var(inode_state_wait_address(inode, bit));
}

struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);

static inline unsigned int i_blocksize(const struct inode *node)
@@ -2395,28 +2411,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 *			i_count.
 *
 * Q: What is the difference between I_WILL_FREE and I_FREEING?
 *
 * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
 * upon. There's one free address left.
 */
#define I_DIRTY_SYNC		(1 << 0)
#define I_DIRTY_DATASYNC	(1 << 1)
#define I_DIRTY_PAGES		(1 << 2)
#define __I_NEW			3
#define __I_NEW			0
#define I_NEW			(1 << __I_NEW)
#define I_WILL_FREE		(1 << 4)
#define I_FREEING		(1 << 5)
#define I_CLEAR			(1 << 6)
#define __I_SYNC		7
#define __I_SYNC		1
#define I_SYNC			(1 << __I_SYNC)
#define I_REFERENCED		(1 << 8)
#define __I_LRU_ISOLATING	2
#define I_LRU_ISOLATING		(1 << __I_LRU_ISOLATING)

#define I_DIRTY_SYNC		(1 << 3)
#define I_DIRTY_DATASYNC	(1 << 4)
#define I_DIRTY_PAGES		(1 << 5)
#define I_WILL_FREE		(1 << 6)
#define I_FREEING		(1 << 7)
#define I_CLEAR			(1 << 8)
#define I_REFERENCED		(1 << 9)
#define I_LINKABLE		(1 << 10)
#define I_DIRTY_TIME		(1 << 11)
#define I_WB_SWITCH		(1 << 13)
#define I_OVL_INUSE		(1 << 14)
#define I_CREATING		(1 << 15)
#define I_DONTCACHE		(1 << 16)
#define I_SYNC_QUEUED		(1 << 17)
#define I_PINNING_NETFS_WB	(1 << 18)
#define __I_LRU_ISOLATING	19
#define I_LRU_ISOLATING		(1 << __I_LRU_ISOLATING)
#define I_WB_SWITCH		(1 << 12)
#define I_OVL_INUSE		(1 << 13)
#define I_CREATING		(1 << 14)
#define I_DONTCACHE		(1 << 15)
#define I_SYNC_QUEUED		(1 << 16)
#define I_PINNING_NETFS_WB	(1 << 17)

#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
Loading