Commit 790e1fa8 authored by Liam R. Howlett's avatar Liam R. Howlett Committed by Andrew Morton

maple_tree: add RCU lock checking to rcu callback functions

Dereferencing RCU objects within the RCU callback without the RCU check
has caused lockdep to complain.  Fix the RCU dereferencing by using the
RCU callback lock to ensure the operation is safe.

Also stop creating a new lock to use for dereferencing during destruction
of the tree or subtree.  Instead, pass through a pointer to the tree that
has the lock that is held for RCU dereferencing checking.  It also does
not make sense to use the maple state in the freeing scenario as the tree
walk is a special case where the tree no longer has the normal encodings
and parent pointers.

Link: https://lkml.kernel.org/r/20230227173632.3292573-8-surenb@google.com
Fixes: 54a611b6 ("Maple Tree: add new data structure")
Signed-off-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: default avatarSuren Baghdasaryan <surenb@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 0a2b18d9
...@@ -824,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt, ...@@ -824,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt,
return rcu_dereference_check(slots[offset], mt_locked(mt)); return rcu_dereference_check(slots[offset], mt_locked(mt));
} }
static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
unsigned char offset)
{
return rcu_dereference_protected(slots[offset], mt_locked(mt));
}
/* /*
* mas_slot_locked() - Get the slot value when holding the maple tree lock. * mas_slot_locked() - Get the slot value when holding the maple tree lock.
* @mas: The maple state * @mas: The maple state
...@@ -835,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt, ...@@ -835,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt,
static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
unsigned char offset) unsigned char offset)
{ {
return rcu_dereference_protected(slots[offset], mt_locked(mas->tree)); return mt_slot_locked(mas->tree, slots, offset);
} }
/* /*
...@@ -907,34 +912,35 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, ...@@ -907,34 +912,35 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
} }
/* /*
* mas_clear_meta() - clear the metadata information of a node, if it exists * mt_clear_meta() - clear the metadata information of a node, if it exists
* @mas: The maple state * @mt: The maple tree
* @mn: The maple node * @mn: The maple node
* @mt: The maple node type * @type: The maple node type
* @offset: The offset of the highest sub-gap in this node. * @offset: The offset of the highest sub-gap in this node.
* @end: The end of the data in this node. * @end: The end of the data in this node.
*/ */
static inline void mas_clear_meta(struct ma_state *mas, struct maple_node *mn, static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
enum maple_type mt) enum maple_type type)
{ {
struct maple_metadata *meta; struct maple_metadata *meta;
unsigned long *pivots; unsigned long *pivots;
void __rcu **slots; void __rcu **slots;
void *next; void *next;
switch (mt) { switch (type) {
case maple_range_64: case maple_range_64:
pivots = mn->mr64.pivot; pivots = mn->mr64.pivot;
if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
slots = mn->mr64.slot; slots = mn->mr64.slot;
next = mas_slot_locked(mas, slots, next = mt_slot_locked(mt, slots,
MAPLE_RANGE64_SLOTS - 1); MAPLE_RANGE64_SLOTS - 1);
if (unlikely((mte_to_node(next) && mte_node_type(next)))) if (unlikely((mte_to_node(next) &&
return; /* The last slot is a node, no metadata */ mte_node_type(next))))
return; /* no metadata, could be node */
} }
fallthrough; fallthrough;
case maple_arange_64: case maple_arange_64:
meta = ma_meta(mn, mt); meta = ma_meta(mn, type);
break; break;
default: default:
return; return;
...@@ -5483,7 +5489,7 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min, ...@@ -5483,7 +5489,7 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
} }
/* /*
* mas_dead_leaves() - Mark all leaves of a node as dead. * mte_dead_leaves() - Mark all leaves of a node as dead.
* @mas: The maple state * @mas: The maple state
* @slots: Pointer to the slot array * @slots: Pointer to the slot array
* @type: The maple node type * @type: The maple node type
...@@ -5493,16 +5499,16 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min, ...@@ -5493,16 +5499,16 @@ static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
* Return: The number of leaves marked as dead. * Return: The number of leaves marked as dead.
*/ */
static inline static inline
unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
enum maple_type mt) void __rcu **slots)
{ {
struct maple_node *node; struct maple_node *node;
enum maple_type type; enum maple_type type;
void *entry; void *entry;
int offset; int offset;
for (offset = 0; offset < mt_slots[mt]; offset++) { for (offset = 0; offset < mt_slot_count(enode); offset++) {
entry = mas_slot_locked(mas, slots, offset); entry = mt_slot(mt, slots, offset);
type = mte_node_type(entry); type = mte_node_type(entry);
node = mte_to_node(entry); node = mte_to_node(entry);
/* Use both node and type to catch LE & BE metadata */ /* Use both node and type to catch LE & BE metadata */
...@@ -5517,162 +5523,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, ...@@ -5517,162 +5523,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots,
return offset; return offset;
} }
static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset) /**
* mte_dead_walk() - Walk down a dead tree to just before the leaves
* @enode: The maple encoded node
* @offset: The starting offset
*
* Note: This can only be used from the RCU callback context.
*/
static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
{ {
struct maple_node *next; struct maple_node *node, *next;
void __rcu **slots = NULL; void __rcu **slots = NULL;
next = mas_mn(mas); next = mte_to_node(*enode);
do { do {
mas->node = mt_mk_node(next, next->type); *enode = ma_enode_ptr(next);
slots = ma_slots(next, next->type); node = mte_to_node(*enode);
next = mas_slot_locked(mas, slots, offset); slots = ma_slots(node, node->type);
next = rcu_dereference_protected(slots[offset],
lock_is_held(&rcu_callback_map));
offset = 0; offset = 0;
} while (!ma_is_leaf(next->type)); } while (!ma_is_leaf(next->type));
return slots; return slots;
} }
/**
* mt_free_walk() - Walk & free a tree in the RCU callback context
* @head: The RCU head that's within the node.
*
* Note: This can only be used from the RCU callback context.
*/
static void mt_free_walk(struct rcu_head *head) static void mt_free_walk(struct rcu_head *head)
{ {
void __rcu **slots; void __rcu **slots;
struct maple_node *node, *start; struct maple_node *node, *start;
struct maple_tree mt; struct maple_enode *enode;
unsigned char offset; unsigned char offset;
enum maple_type type; enum maple_type type;
MA_STATE(mas, &mt, 0, 0);
node = container_of(head, struct maple_node, rcu); node = container_of(head, struct maple_node, rcu);
if (ma_is_leaf(node->type)) if (ma_is_leaf(node->type))
goto free_leaf; goto free_leaf;
mt_init_flags(&mt, node->ma_flags);
mas_lock(&mas);
start = node; start = node;
mas.node = mt_mk_node(node, node->type); enode = mt_mk_node(node, node->type);
slots = mas_dead_walk(&mas, 0); slots = mte_dead_walk(&enode, 0);
node = mas_mn(&mas); node = mte_to_node(enode);
do { do {
mt_free_bulk(node->slot_len, slots); mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1; offset = node->parent_slot + 1;
mas.node = node->piv_parent; enode = node->piv_parent;
if (mas_mn(&mas) == node) if (mte_to_node(enode) == node)
goto start_slots_free; goto free_leaf;
type = mte_node_type(mas.node); type = mte_node_type(enode);
slots = ma_slots(mte_to_node(mas.node), type); slots = ma_slots(mte_to_node(enode), type);
if ((offset < mt_slots[type]) && (slots[offset])) if ((offset < mt_slots[type]) &&
slots = mas_dead_walk(&mas, offset); rcu_dereference_protected(slots[offset],
lock_is_held(&rcu_callback_map)))
node = mas_mn(&mas); slots = mte_dead_walk(&enode, offset);
node = mte_to_node(enode);
} while ((node != start) || (node->slot_len < offset)); } while ((node != start) || (node->slot_len < offset));
slots = ma_slots(node, node->type); slots = ma_slots(node, node->type);
mt_free_bulk(node->slot_len, slots); mt_free_bulk(node->slot_len, slots);
start_slots_free:
mas_unlock(&mas);
free_leaf: free_leaf:
mt_free_rcu(&node->rcu); mt_free_rcu(&node->rcu);
} }
static inline void __rcu **mas_destroy_descend(struct ma_state *mas, static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
struct maple_enode *prev, unsigned char offset) struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
{ {
struct maple_node *node; struct maple_node *node;
struct maple_enode *next = mas->node; struct maple_enode *next = *enode;
void __rcu **slots = NULL; void __rcu **slots = NULL;
enum maple_type type;
unsigned char next_offset = 0;
do { do {
mas->node = next; *enode = next;
node = mas_mn(mas); node = mte_to_node(*enode);
slots = ma_slots(node, mte_node_type(mas->node)); type = mte_node_type(*enode);
next = mas_slot_locked(mas, slots, 0); slots = ma_slots(node, type);
if ((mte_dead_node(next))) { next = mt_slot_locked(mt, slots, next_offset);
mte_to_node(next)->type = mte_node_type(next); if ((mte_dead_node(next)))
next = mas_slot_locked(mas, slots, 1); next = mt_slot_locked(mt, slots, ++next_offset);
}
mte_set_node_dead(mas->node); mte_set_node_dead(*enode);
node->type = mte_node_type(mas->node); node->type = type;
mas_clear_meta(mas, node, node->type);
node->piv_parent = prev; node->piv_parent = prev;
node->parent_slot = offset; node->parent_slot = offset;
offset = 0; offset = next_offset;
prev = mas->node; next_offset = 0;
prev = *enode;
} while (!mte_is_leaf(next)); } while (!mte_is_leaf(next));
return slots; return slots;
} }
static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
bool free) bool free)
{ {
void __rcu **slots; void __rcu **slots;
struct maple_node *node = mte_to_node(enode); struct maple_node *node = mte_to_node(enode);
struct maple_enode *start; struct maple_enode *start;
struct maple_tree mt;
MA_STATE(mas, &mt, 0, 0);
mas.node = enode;
if (mte_is_leaf(enode)) { if (mte_is_leaf(enode)) {
node->type = mte_node_type(enode); node->type = mte_node_type(enode);
goto free_leaf; goto free_leaf;
} }
ma_flags &= ~MT_FLAGS_LOCK_MASK;
mt_init_flags(&mt, ma_flags);
mas_lock(&mas);
mte_to_node(enode)->ma_flags = ma_flags;
start = enode; start = enode;
slots = mas_destroy_descend(&mas, start, 0); slots = mte_destroy_descend(&enode, mt, start, 0);
node = mas_mn(&mas); node = mte_to_node(enode); // Updated in the above call.
do { do {
enum maple_type type; enum maple_type type;
unsigned char offset; unsigned char offset;
struct maple_enode *parent, *tmp; struct maple_enode *parent, *tmp;
node->type = mte_node_type(mas.node); node->slot_len = mte_dead_leaves(enode, mt, slots);
node->slot_len = mas_dead_leaves(&mas, slots, node->type);
if (free) if (free)
mt_free_bulk(node->slot_len, slots); mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1; offset = node->parent_slot + 1;
mas.node = node->piv_parent; enode = node->piv_parent;
if (mas_mn(&mas) == node) if (mte_to_node(enode) == node)
goto start_slots_free; goto free_leaf;
type = mte_node_type(mas.node); type = mte_node_type(enode);
slots = ma_slots(mte_to_node(mas.node), type); slots = ma_slots(mte_to_node(enode), type);
if (offset >= mt_slots[type]) if (offset >= mt_slots[type])
goto next; goto next;
tmp = mas_slot_locked(&mas, slots, offset); tmp = mt_slot_locked(mt, slots, offset);
if (mte_node_type(tmp) && mte_to_node(tmp)) { if (mte_node_type(tmp) && mte_to_node(tmp)) {
parent = mas.node; parent = enode;
mas.node = tmp; enode = tmp;
slots = mas_destroy_descend(&mas, parent, offset); slots = mte_destroy_descend(&enode, mt, parent, offset);
} }
next: next:
node = mas_mn(&mas); node = mte_to_node(enode);
} while (start != mas.node); } while (start != enode);
node = mas_mn(&mas); node = mte_to_node(enode);
node->type = mte_node_type(mas.node); node->slot_len = mte_dead_leaves(enode, mt, slots);
node->slot_len = mas_dead_leaves(&mas, slots, node->type);
if (free) if (free)
mt_free_bulk(node->slot_len, slots); mt_free_bulk(node->slot_len, slots);
start_slots_free:
mas_unlock(&mas);
free_leaf: free_leaf:
if (free) if (free)
mt_free_rcu(&node->rcu); mt_free_rcu(&node->rcu);
else else
mas_clear_meta(&mas, node, node->type); mt_clear_meta(mt, node, node->type);
} }
/* /*
...@@ -5688,10 +5692,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode, ...@@ -5688,10 +5692,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
struct maple_node *node = mte_to_node(enode); struct maple_node *node = mte_to_node(enode);
if (mt_in_rcu(mt)) { if (mt_in_rcu(mt)) {
mt_destroy_walk(enode, mt->ma_flags, false); mt_destroy_walk(enode, mt, false);
call_rcu(&node->rcu, mt_free_walk); call_rcu(&node->rcu, mt_free_walk);
} else { } else {
mt_destroy_walk(enode, mt->ma_flags, true); mt_destroy_walk(enode, mt, true);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment