Commit f4b109c6 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

lib: radix-tree: add entry deletion support to __radix_tree_replace()

Page cache shadow entry handling will be a lot simpler when it can use a
single generic replacement function for pages, shadow entries, and
emptying slots.

Make __radix_tree_replace() properly account insertions and deletions in
node->count and garbage collect nodes as they become empty.  Then
re-implement radix_tree_delete() on top of it.

Link: http://lkml.kernel.org/r/20161117193058.GC23430@cmpxchg.orgSigned-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 6d75f366
...@@ -538,6 +538,107 @@ static int radix_tree_extend(struct radix_tree_root *root, ...@@ -538,6 +538,107 @@ static int radix_tree_extend(struct radix_tree_root *root,
return maxshift + RADIX_TREE_MAP_SHIFT; return maxshift + RADIX_TREE_MAP_SHIFT;
} }
/**
* radix_tree_shrink - shrink radix tree to minimum height
* @root radix tree root
*/
static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
bool shrunk = false;
for (;;) {
struct radix_tree_node *node = root->rnode;
struct radix_tree_node *child;
if (!radix_tree_is_internal_node(node))
break;
node = entry_to_node(node);
/*
* The candidate node has more than one child, or its child
* is not at the leftmost slot, or the child is a multiorder
* entry, we cannot shrink.
*/
if (node->count != 1)
break;
child = node->slots[0];
if (!child)
break;
if (!radix_tree_is_internal_node(child) && node->shift)
break;
if (radix_tree_is_internal_node(child))
entry_to_node(child)->parent = NULL;
/*
* We don't need rcu_assign_pointer(), since we are simply
* moving the node from one part of the tree to another: if it
* was safe to dereference the old pointer to it
* (node->slots[0]), it will be safe to dereference the new
* one (root->rnode) as far as dependent read barriers go.
*/
root->rnode = child;
/*
* We have a dilemma here. The node's slot[0] must not be
* NULLed in case there are concurrent lookups expecting to
* find the item. However if this was a bottom-level node,
* then it may be subject to the slot pointer being visible
* to callers dereferencing it. If item corresponding to
* slot[0] is subsequently deleted, these callers would expect
* their slot to become empty sooner or later.
*
* For example, lockless pagecache will look up a slot, deref
* the page pointer, and if the page has 0 refcount it means it
* was concurrently deleted from pagecache so try the deref
* again. Fortunately there is already a requirement for logic
* to retry the entire slot lookup -- the indirect pointer
* problem (replacing direct root node with an indirect pointer
* also results in a stale slot). So tag the slot as indirect
* to force callers to retry.
*/
if (!radix_tree_is_internal_node(child))
node->slots[0] = RADIX_TREE_RETRY;
radix_tree_node_free(node);
shrunk = true;
}
return shrunk;
}
static bool delete_node(struct radix_tree_root *root,
struct radix_tree_node *node)
{
bool deleted = false;
do {
struct radix_tree_node *parent;
if (node->count) {
if (node == entry_to_node(root->rnode))
deleted |= radix_tree_shrink(root);
return deleted;
}
parent = node->parent;
if (parent) {
parent->slots[node->offset] = NULL;
parent->count--;
} else {
root_tag_clear_all(root);
root->rnode = NULL;
}
radix_tree_node_free(node);
deleted = true;
node = parent;
} while (node);
return deleted;
}
/** /**
* __radix_tree_create - create a slot in a radix tree * __radix_tree_create - create a slot in a radix tree
* @root: radix tree root * @root: radix tree root
...@@ -759,18 +860,20 @@ static void replace_slot(struct radix_tree_root *root, ...@@ -759,18 +860,20 @@ static void replace_slot(struct radix_tree_root *root,
bool warn_typeswitch) bool warn_typeswitch)
{ {
void *old = rcu_dereference_raw(*slot); void *old = rcu_dereference_raw(*slot);
int exceptional; int count, exceptional;
WARN_ON_ONCE(radix_tree_is_internal_node(item)); WARN_ON_ONCE(radix_tree_is_internal_node(item));
WARN_ON_ONCE(!!item - !!old);
count = !!item - !!old;
exceptional = !!radix_tree_exceptional_entry(item) - exceptional = !!radix_tree_exceptional_entry(item) -
!!radix_tree_exceptional_entry(old); !!radix_tree_exceptional_entry(old);
WARN_ON_ONCE(warn_typeswitch && exceptional); WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
if (node) if (node) {
node->count += count;
node->exceptional += exceptional; node->exceptional += exceptional;
}
rcu_assign_pointer(*slot, item); rcu_assign_pointer(*slot, item);
} }
...@@ -790,12 +893,14 @@ void __radix_tree_replace(struct radix_tree_root *root, ...@@ -790,12 +893,14 @@ void __radix_tree_replace(struct radix_tree_root *root,
void **slot, void *item) void **slot, void *item)
{ {
/* /*
* This function supports replacing exceptional entries, but * This function supports replacing exceptional entries and
* that needs accounting against the node unless the slot is * deleting entries, but that needs accounting against the
* root->rnode. * node unless the slot is root->rnode.
*/ */
replace_slot(root, node, slot, item, replace_slot(root, node, slot, item,
!node && slot != (void **)&root->rnode); !node && slot != (void **)&root->rnode);
delete_node(root, node);
} }
/** /**
...@@ -810,8 +915,8 @@ void __radix_tree_replace(struct radix_tree_root *root, ...@@ -810,8 +915,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
* *
* NOTE: This cannot be used to switch between non-entries (empty slots), * NOTE: This cannot be used to switch between non-entries (empty slots),
* regular entries, and exceptional entries, as that requires accounting * regular entries, and exceptional entries, as that requires accounting
* inside the radix tree node. When switching from one type of entry to * inside the radix tree node. When switching from one type of entry or
* another, use __radix_tree_lookup() and __radix_tree_replace(). * deleting, use __radix_tree_lookup() and __radix_tree_replace().
*/ */
void radix_tree_replace_slot(struct radix_tree_root *root, void radix_tree_replace_slot(struct radix_tree_root *root,
void **slot, void *item) void **slot, void *item)
...@@ -1466,75 +1571,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) ...@@ -1466,75 +1571,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
} }
#endif /* CONFIG_SHMEM && CONFIG_SWAP */ #endif /* CONFIG_SHMEM && CONFIG_SWAP */
/**
* radix_tree_shrink - shrink radix tree to minimum height
* @root radix tree root
*/
static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
bool shrunk = false;
for (;;) {
struct radix_tree_node *node = root->rnode;
struct radix_tree_node *child;
if (!radix_tree_is_internal_node(node))
break;
node = entry_to_node(node);
/*
* The candidate node has more than one child, or its child
* is not at the leftmost slot, or the child is a multiorder
* entry, we cannot shrink.
*/
if (node->count != 1)
break;
child = node->slots[0];
if (!child)
break;
if (!radix_tree_is_internal_node(child) && node->shift)
break;
if (radix_tree_is_internal_node(child))
entry_to_node(child)->parent = NULL;
/*
* We don't need rcu_assign_pointer(), since we are simply
* moving the node from one part of the tree to another: if it
* was safe to dereference the old pointer to it
* (node->slots[0]), it will be safe to dereference the new
* one (root->rnode) as far as dependent read barriers go.
*/
root->rnode = child;
/*
* We have a dilemma here. The node's slot[0] must not be
* NULLed in case there are concurrent lookups expecting to
* find the item. However if this was a bottom-level node,
* then it may be subject to the slot pointer being visible
* to callers dereferencing it. If item corresponding to
* slot[0] is subsequently deleted, these callers would expect
* their slot to become empty sooner or later.
*
* For example, lockless pagecache will look up a slot, deref
* the page pointer, and if the page has 0 refcount it means it
* was concurrently deleted from pagecache so try the deref
* again. Fortunately there is already a requirement for logic
* to retry the entire slot lookup -- the indirect pointer
* problem (replacing direct root node with an indirect pointer
* also results in a stale slot). So tag the slot as indirect
* to force callers to retry.
*/
if (!radix_tree_is_internal_node(child))
node->slots[0] = RADIX_TREE_RETRY;
radix_tree_node_free(node);
shrunk = true;
}
return shrunk;
}
/** /**
* __radix_tree_delete_node - try to free node after clearing a slot * __radix_tree_delete_node - try to free node after clearing a slot
* @root: radix tree root * @root: radix tree root
...@@ -1549,33 +1585,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) ...@@ -1549,33 +1585,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
bool __radix_tree_delete_node(struct radix_tree_root *root, bool __radix_tree_delete_node(struct radix_tree_root *root,
struct radix_tree_node *node) struct radix_tree_node *node)
{ {
bool deleted = false; return delete_node(root, node);
do {
struct radix_tree_node *parent;
if (node->count) {
if (node == entry_to_node(root->rnode))
deleted |= radix_tree_shrink(root);
return deleted;
}
parent = node->parent;
if (parent) {
parent->slots[node->offset] = NULL;
parent->count--;
} else {
root_tag_clear_all(root);
root->rnode = NULL;
}
radix_tree_node_free(node);
deleted = true;
node = parent;
} while (node);
return deleted;
} }
static inline void delete_sibling_entries(struct radix_tree_node *node, static inline void delete_sibling_entries(struct radix_tree_node *node,
...@@ -1632,12 +1642,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, ...@@ -1632,12 +1642,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
node_tag_clear(root, node, tag, offset); node_tag_clear(root, node, tag, offset);
delete_sibling_entries(node, node_to_entry(slot), offset); delete_sibling_entries(node, node_to_entry(slot), offset);
node->slots[offset] = NULL; __radix_tree_replace(root, node, slot, NULL);
node->count--;
if (radix_tree_exceptional_entry(entry))
node->exceptional--;
__radix_tree_delete_node(root, node);
return entry; return entry;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment