Commit e1b8b883 authored by Sidhartha Kumar's avatar Sidhartha Kumar Committed by Andrew Morton
Browse files

maple_tree: reset mas->index and mas->last on write retries

The following scenario can result in a race condition:

Consider a node with the following indices and values

	a<------->b<----------->c<--------->d
	    0xA        NULL          0xB

	CPU 1			  CPU 2
      ---------        		---------
	mas_set_range(a,b)
	mas_erase()
		-> range is expanded (a,c) because of null expansion

	mas_nomem()
	mas_unlock()
				mas_store_range(b,c,0xC)

The node now looks like:

	a<------->b<----------->c<--------->d
	    0xA        0xC          0xB

	mas_lock()
	mas_erase() <------ range of erase is still (a,c)

The node is now NULL from (a,c) but the write from CPU 2 should have been
retained and range (b,c) should still have 0xC as its value.  We can fix
this by re-intializing to the original index and last.  This does not need
a cc: Stable as there are no users of the maple tree which use internal
locking and this condition is only possible with internal locking.

Link: https://lkml.kernel.org/r/20240812190543.71967-1-sidhartha.kumar@oracle.com


Signed-off-by: default avatarSidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent c0f398c3
Loading
Loading
Loading
Loading
+12 −4
Original line number Diff line number Diff line
@@ -5451,14 +5451,19 @@ EXPORT_SYMBOL_GPL(mas_store);
 */
int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
{
	unsigned long index = mas->index;
	unsigned long last = mas->last;
	MA_WR_STATE(wr_mas, mas, entry);

	mas_wr_store_setup(&wr_mas);
	trace_ma_write(__func__, mas, 0, entry);
retry:
	mas_wr_store_entry(&wr_mas);
	if (unlikely(mas_nomem(mas, gfp)))
	if (unlikely(mas_nomem(mas, gfp))) {
		if (!entry)
			__mas_set_range(mas, index, last);
		goto retry;
	}

	if (unlikely(mas_is_err(mas)))
		return xa_err(mas->node);
@@ -6245,23 +6250,26 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev);
void *mas_erase(struct ma_state *mas)
{
	void *entry;
	unsigned long index = mas->index;
	MA_WR_STATE(wr_mas, mas, NULL);

	if (!mas_is_active(mas) || !mas_is_start(mas))
		mas->status = ma_start;

	/* Retry unnecessary when holding the write lock. */
write_retry:
	entry = mas_state_walk(mas);
	if (!entry)
		return NULL;

write_retry:
	/* Must reset to ensure spanning writes of last slot are detected */
	mas_reset(mas);
	mas_wr_store_setup(&wr_mas);
	mas_wr_store_entry(&wr_mas);
	if (mas_nomem(mas, GFP_KERNEL))
	if (mas_nomem(mas, GFP_KERNEL)) {
		/* in case the range of entry changed when unlocked */
		mas->index = mas->last = index;
		goto write_retry;
	}

	return entry;
}