xarray: Fix early termination of xas_for_each_marked

xas_for_each_marked() is using entry == NULL as a termination condition
of the iteration. When xas_for_each_marked() is used protected only by
RCU, this can however race with xas_store(xas, NULL) in the following
way:

TASK1                                   TASK2
page_cache_delete()         	        find_get_pages_range_tag()
                                          xas_for_each_marked()
                                            xas_find_marked()
                                              off = xas_find_chunk()

  xas_store(&xas, NULL)
    xas_init_marks(&xas);
    ...
    rcu_assign_pointer(*slot, NULL);
                                              entry = xa_entry(off);

And thus xas_for_each_marked() terminates prematurely possibly leading
to missed entries in the iteration (translating to missing writeback of
some pages or a similar problem).

If we find a NULL entry that has been marked, skip it (unless we're trying
to allocate an entry).
Reported-by: default avatarJan Kara <jack@suse.cz>
CC: stable@vger.kernel.org
Fixes: ef8e5717 ("page cache: Convert delete_batch to XArray")
Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
parent 34eee836
......@@ -1648,6 +1648,7 @@ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max,
xa_mark_t mark)
{
struct xa_node *node = xas->xa_node;
void *entry;
unsigned int offset;
if (unlikely(xas_not_node(node) || node->shift))
......@@ -1659,7 +1660,10 @@ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max,
return NULL;
if (offset == XA_CHUNK_SIZE)
return xas_find_marked(xas, max, mark);
return xa_entry(xas->xa, node, offset);
entry = xa_entry(xas->xa, node, offset);
if (!entry)
return xas_find_marked(xas, max, mark);
return entry;
}
/*
......
......@@ -1208,6 +1208,8 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
}
entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK))
continue;
if (!xa_is_node(entry))
return entry;
xas->xa_node = xa_to_node(entry);
......
......@@ -7,8 +7,8 @@ LDLIBS+= -lpthread -lurcu
TARGETS = main idr-test multiorder xarray
CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
regression4.o \
tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
iteration_check_2.o benchmark.o
ifndef SHIFT
SHIFT=3
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* iteration_check_2.c: Check that deleting a tagged entry doesn't cause
* an RCU walker to finish early.
* Copyright (c) 2020 Oracle
* Author: Matthew Wilcox <willy@infradead.org>
*/
#include <pthread.h>
#include "test.h"
static volatile bool test_complete;
static void *iterator(void *arg)
{
XA_STATE(xas, arg, 0);
void *entry;
rcu_register_thread();
while (!test_complete) {
xas_set(&xas, 0);
rcu_read_lock();
xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
;
rcu_read_unlock();
assert(xas.xa_index >= 100);
}
rcu_unregister_thread();
return NULL;
}
static void *throbber(void *arg)
{
struct xarray *xa = arg;
rcu_register_thread();
while (!test_complete) {
int i;
for (i = 0; i < 100; i++) {
xa_store(xa, i, xa_mk_value(i), GFP_KERNEL);
xa_set_mark(xa, i, XA_MARK_0);
}
for (i = 0; i < 100; i++)
xa_erase(xa, i);
}
rcu_unregister_thread();
return NULL;
}
void iteration_test2(unsigned test_duration)
{
pthread_t threads[2];
DEFINE_XARRAY(array);
int i;
printv(1, "Running iteration test 2 for %d seconds\n", test_duration);
test_complete = false;
xa_store(&array, 100, xa_mk_value(100), GFP_KERNEL);
xa_set_mark(&array, 100, XA_MARK_0);
if (pthread_create(&threads[0], NULL, iterator, &array)) {
perror("create iterator thread");
exit(1);
}
if (pthread_create(&threads[1], NULL, throbber, &array)) {
perror("create throbber thread");
exit(1);
}
sleep(test_duration);
test_complete = true;
for (i = 0; i < 2; i++) {
if (pthread_join(threads[i], NULL)) {
perror("pthread_join");
exit(1);
}
}
xa_destroy(&array);
}
......@@ -311,6 +311,7 @@ int main(int argc, char **argv)
regression4_test();
iteration_test(0, 10 + 90 * long_run);
iteration_test(7, 10 + 90 * long_run);
iteration_test2(10 + 90 * long_run);
single_thread_tests(long_run);
/* Free any remaining preallocated nodes */
......
......@@ -34,6 +34,7 @@ void xarray_tests(void);
void tag_check(void);
void multiorder_checks(void);
void iteration_test(unsigned order, unsigned duration);
void iteration_test2(unsigned duration);
void benchmark(void);
void idr_checks(void);
void ida_tests(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment