Commit 139e5616 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

lib: radix_tree: tree node interface

Make struct radix_tree_node part of the public interface and provide API
functions to create, look up, and delete whole nodes.  Refactor the
existing insert, look up, delete functions on top of these new node
primitives.

This will allow the VM to track and garbage collect page cache radix
tree nodes.

[sasha.levin@oracle.com: return correct error code on insertion failure]
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: default avatarRik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Luigi Semenzato <semenzato@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Metin Doslu <metin@citusdata.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ozgun Erdogan <ozgun@citusdata.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <klamm@yandex-team.ru>
Cc: Ryan Mallon <rmallon@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarSasha Levin <sasha.levin@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a528910e
...@@ -60,6 +60,33 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) ...@@ -60,6 +60,33 @@ static inline int radix_tree_is_indirect_ptr(void *ptr)
#define RADIX_TREE_MAX_TAGS 3 #define RADIX_TREE_MAX_TAGS 3
#ifdef __KERNEL__
#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
#else
#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
#endif
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
#define RADIX_TREE_TAG_LONGS \
((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
struct radix_tree_node {
unsigned int height; /* Height from the bottom */
unsigned int count;
union {
struct radix_tree_node *parent; /* Used when ascending tree */
struct rcu_head rcu_head; /* Used when freeing node */
};
void __rcu *slots[RADIX_TREE_MAP_SIZE];
unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
};
#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
RADIX_TREE_MAP_SHIFT))
/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ /* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
struct radix_tree_root { struct radix_tree_root {
unsigned int height; unsigned int height;
...@@ -101,6 +128,7 @@ do { \ ...@@ -101,6 +128,7 @@ do { \
* concurrently with other readers. * concurrently with other readers.
* *
* The notable exceptions to this rule are the following functions: * The notable exceptions to this rule are the following functions:
* __radix_tree_lookup
* radix_tree_lookup * radix_tree_lookup
* radix_tree_lookup_slot * radix_tree_lookup_slot
* radix_tree_tag_get * radix_tree_tag_get
...@@ -216,9 +244,15 @@ static inline void radix_tree_replace_slot(void **pslot, void *item) ...@@ -216,9 +244,15 @@ static inline void radix_tree_replace_slot(void **pslot, void *item)
rcu_assign_pointer(*pslot, item); rcu_assign_pointer(*pslot, item);
} }
int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp);
int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp);
void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
bool __radix_tree_delete_node(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node *node);
void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
void *radix_tree_delete(struct radix_tree_root *, unsigned long); void *radix_tree_delete(struct radix_tree_root *, unsigned long);
unsigned int unsigned int
......
...@@ -35,33 +35,6 @@ ...@@ -35,33 +35,6 @@
#include <linux/hardirq.h> /* in_interrupt() */ #include <linux/hardirq.h> /* in_interrupt() */
#ifdef __KERNEL__
#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
#else
#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
#endif
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
#define RADIX_TREE_TAG_LONGS \
((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
struct radix_tree_node {
unsigned int height; /* Height from the bottom */
unsigned int count;
union {
struct radix_tree_node *parent; /* Used when ascending tree */
struct rcu_head rcu_head; /* Used when freeing node */
};
void __rcu *slots[RADIX_TREE_MAP_SIZE];
unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
};
#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
RADIX_TREE_MAP_SHIFT))
/* /*
* The height_to_maxindex array needs to be one deeper than the maximum * The height_to_maxindex array needs to be one deeper than the maximum
* path as height 0 holds only 1 entry. * path as height 0 holds only 1 entry.
...@@ -387,23 +360,28 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) ...@@ -387,23 +360,28 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
} }
/** /**
* radix_tree_insert - insert into a radix tree * __radix_tree_create - create a slot in a radix tree
* @root: radix tree root * @root: radix tree root
* @index: index key * @index: index key
* @item: item to insert * @nodep: returns node
* @slotp: returns slot
* *
* Insert an item into the radix tree at position @index. * Create, if necessary, and return the node and slot for an item
* at position @index in the radix tree @root.
*
* Until there is more than one item in the tree, no nodes are
* allocated and @root->rnode is used as a direct slot instead of
* pointing to a node, in which case *@nodep will be NULL.
*
* Returns -ENOMEM, or 0 for success.
*/ */
int radix_tree_insert(struct radix_tree_root *root, int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
unsigned long index, void *item) struct radix_tree_node **nodep, void ***slotp)
{ {
struct radix_tree_node *node = NULL, *slot; struct radix_tree_node *node = NULL, *slot;
unsigned int height, shift; unsigned int height, shift, offset;
int offset;
int error; int error;
BUG_ON(radix_tree_is_indirect_ptr(item));
/* Make sure the tree is high enough. */ /* Make sure the tree is high enough. */
if (index > radix_tree_maxindex(root->height)) { if (index > radix_tree_maxindex(root->height)) {
error = radix_tree_extend(root, index); error = radix_tree_extend(root, index);
...@@ -439,16 +417,42 @@ int radix_tree_insert(struct radix_tree_root *root, ...@@ -439,16 +417,42 @@ int radix_tree_insert(struct radix_tree_root *root,
height--; height--;
} }
if (slot != NULL) if (nodep)
*nodep = node;
if (slotp)
*slotp = node ? node->slots + offset : (void **)&root->rnode;
return 0;
}
/**
* radix_tree_insert - insert into a radix tree
* @root: radix tree root
* @index: index key
* @item: item to insert
*
* Insert an item into the radix tree at position @index.
*/
int radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item)
{
struct radix_tree_node *node;
void **slot;
int error;
BUG_ON(radix_tree_is_indirect_ptr(item));
error = __radix_tree_create(root, index, &node, &slot);
if (error)
return error;
if (*slot != NULL)
return -EEXIST; return -EEXIST;
rcu_assign_pointer(*slot, item);
if (node) { if (node) {
node->count++; node->count++;
rcu_assign_pointer(node->slots[offset], item); BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK));
BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK));
BUG_ON(tag_get(node, 1, offset));
} else { } else {
rcu_assign_pointer(root->rnode, item);
BUG_ON(root_tag_get(root, 0)); BUG_ON(root_tag_get(root, 0));
BUG_ON(root_tag_get(root, 1)); BUG_ON(root_tag_get(root, 1));
} }
...@@ -457,15 +461,26 @@ int radix_tree_insert(struct radix_tree_root *root, ...@@ -457,15 +461,26 @@ int radix_tree_insert(struct radix_tree_root *root,
} }
EXPORT_SYMBOL(radix_tree_insert); EXPORT_SYMBOL(radix_tree_insert);
/* /**
* is_slot == 1 : search for the slot. * __radix_tree_lookup - lookup an item in a radix tree
* is_slot == 0 : search for the node. * @root: radix tree root
* @index: index key
* @nodep: returns node
* @slotp: returns slot
*
* Lookup and return the item at position @index in the radix
* tree @root.
*
* Until there is more than one item in the tree, no nodes are
* allocated and @root->rnode is used as a direct slot instead of
* pointing to a node, in which case *@nodep will be NULL.
*/ */
static void *radix_tree_lookup_element(struct radix_tree_root *root, void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
unsigned long index, int is_slot) struct radix_tree_node **nodep, void ***slotp)
{ {
struct radix_tree_node *node, *parent;
unsigned int height, shift; unsigned int height, shift;
struct radix_tree_node *node, **slot; void **slot;
node = rcu_dereference_raw(root->rnode); node = rcu_dereference_raw(root->rnode);
if (node == NULL) if (node == NULL)
...@@ -474,7 +489,12 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, ...@@ -474,7 +489,12 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
if (!radix_tree_is_indirect_ptr(node)) { if (!radix_tree_is_indirect_ptr(node)) {
if (index > 0) if (index > 0)
return NULL; return NULL;
return is_slot ? (void *)&root->rnode : node;
if (nodep)
*nodep = NULL;
if (slotp)
*slotp = (void **)&root->rnode;
return node;
} }
node = indirect_to_ptr(node); node = indirect_to_ptr(node);
...@@ -485,8 +505,8 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, ...@@ -485,8 +505,8 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
shift = (height-1) * RADIX_TREE_MAP_SHIFT; shift = (height-1) * RADIX_TREE_MAP_SHIFT;
do { do {
slot = (struct radix_tree_node **) parent = node;
(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK);
node = rcu_dereference_raw(*slot); node = rcu_dereference_raw(*slot);
if (node == NULL) if (node == NULL)
return NULL; return NULL;
...@@ -495,7 +515,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, ...@@ -495,7 +515,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
height--; height--;
} while (height > 0); } while (height > 0);
return is_slot ? (void *)slot : indirect_to_ptr(node); if (nodep)
*nodep = parent;
if (slotp)
*slotp = slot;
return node;
} }
/** /**
...@@ -513,7 +537,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, ...@@ -513,7 +537,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
*/ */
void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
{ {
return (void **)radix_tree_lookup_element(root, index, 1); void **slot;
if (!__radix_tree_lookup(root, index, NULL, &slot))
return NULL;
return slot;
} }
EXPORT_SYMBOL(radix_tree_lookup_slot); EXPORT_SYMBOL(radix_tree_lookup_slot);
...@@ -531,7 +559,7 @@ EXPORT_SYMBOL(radix_tree_lookup_slot); ...@@ -531,7 +559,7 @@ EXPORT_SYMBOL(radix_tree_lookup_slot);
*/ */
void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
{ {
return radix_tree_lookup_element(root, index, 0); return __radix_tree_lookup(root, index, NULL, NULL);
} }
EXPORT_SYMBOL(radix_tree_lookup); EXPORT_SYMBOL(radix_tree_lookup);
...@@ -1261,6 +1289,56 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) ...@@ -1261,6 +1289,56 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
} }
} }
/**
* __radix_tree_delete_node - try to free node after clearing a slot
* @root: radix tree root
* @index: index key
* @node: node containing @index
*
* After clearing the slot at @index in @node from radix tree
* rooted at @root, call this function to attempt freeing the
* node and shrinking the tree.
*
* Returns %true if @node was freed, %false otherwise.
*/
bool __radix_tree_delete_node(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node *node)
{
bool deleted = false;
do {
struct radix_tree_node *parent;
if (node->count) {
if (node == indirect_to_ptr(root->rnode)) {
radix_tree_shrink(root);
if (root->height == 0)
deleted = true;
}
return deleted;
}
parent = node->parent;
if (parent) {
index >>= RADIX_TREE_MAP_SHIFT;
parent->slots[index & RADIX_TREE_MAP_MASK] = NULL;
parent->count--;
} else {
root_tag_clear_all(root);
root->height = 0;
root->rnode = NULL;
}
radix_tree_node_free(node);
deleted = true;
node = parent;
} while (node);
return deleted;
}
/** /**
* radix_tree_delete_item - delete an item from a radix tree * radix_tree_delete_item - delete an item from a radix tree
* @root: radix tree root * @root: radix tree root
...@@ -1275,43 +1353,26 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) ...@@ -1275,43 +1353,26 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
void *radix_tree_delete_item(struct radix_tree_root *root, void *radix_tree_delete_item(struct radix_tree_root *root,
unsigned long index, void *item) unsigned long index, void *item)
{ {
struct radix_tree_node *node = NULL; struct radix_tree_node *node;
struct radix_tree_node *slot = NULL; unsigned int offset;
struct radix_tree_node *to_free; void **slot;
unsigned int height, shift; void *entry;
int tag; int tag;
int uninitialized_var(offset);
height = root->height; entry = __radix_tree_lookup(root, index, &node, &slot);
if (index > radix_tree_maxindex(height)) if (!entry)
goto out; return NULL;
slot = root->rnode; if (item && entry != item)
if (height == 0) { return NULL;
if (!node) {
root_tag_clear_all(root); root_tag_clear_all(root);
root->rnode = NULL; root->rnode = NULL;
goto out; return entry;
} }
slot = indirect_to_ptr(slot);
shift = height * RADIX_TREE_MAP_SHIFT;
do {
if (slot == NULL)
goto out;
shift -= RADIX_TREE_MAP_SHIFT;
offset = (index >> shift) & RADIX_TREE_MAP_MASK;
node = slot;
slot = slot->slots[offset];
} while (shift);
if (slot == NULL)
goto out;
if (item && slot != item) { offset = index & RADIX_TREE_MAP_MASK;
slot = NULL;
goto out;
}
/* /*
* Clear all tags associated with the item to be deleted. * Clear all tags associated with the item to be deleted.
...@@ -1322,40 +1383,12 @@ void *radix_tree_delete_item(struct radix_tree_root *root, ...@@ -1322,40 +1383,12 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
radix_tree_tag_clear(root, index, tag); radix_tree_tag_clear(root, index, tag);
} }
to_free = NULL; node->slots[offset] = NULL;
/* Now free the nodes we do not need anymore */ node->count--;
while (node) {
node->slots[offset] = NULL;
node->count--;
/*
* Queue the node for deferred freeing after the
* last reference to it disappears (set NULL, above).
*/
if (to_free)
radix_tree_node_free(to_free);
if (node->count) {
if (node == indirect_to_ptr(root->rnode))
radix_tree_shrink(root);
goto out;
}
/* Node with zero slots in use so free it */
to_free = node;
index >>= RADIX_TREE_MAP_SHIFT;
offset = index & RADIX_TREE_MAP_MASK;
node = node->parent;
}
root_tag_clear_all(root); __radix_tree_delete_node(root, index, node);
root->height = 0;
root->rnode = NULL;
if (to_free)
radix_tree_node_free(to_free);
out: return entry;
return slot;
} }
EXPORT_SYMBOL(radix_tree_delete_item); EXPORT_SYMBOL(radix_tree_delete_item);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment