Commit b5d7388f authored by Craig Gallek's avatar Craig Gallek Committed by David S. Miller

bpf: Optimize lpm trie delete

Before the delete operator was added, this datastructure maintained
an invariant that intermediate nodes were only present when necessary
to build the tree.  This patch updates the delete operation to reinstate
that invariant by removing unnecessary intermediate nodes after a node is
removed and thus keeping the tree structure at a minimal size.
Suggested-by: default avatarDaniel Mack <daniel@zonque.org>
Signed-off-by: default avatarCraig Gallek <kraig@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d835b63c
...@@ -394,8 +394,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) ...@@ -394,8 +394,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
{ {
struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct bpf_lpm_trie_key *key = _key; struct bpf_lpm_trie_key *key = _key;
struct lpm_trie_node __rcu **trim; struct lpm_trie_node __rcu **trim, **trim2;
struct lpm_trie_node *node; struct lpm_trie_node *node, *parent;
unsigned long irq_flags; unsigned long irq_flags;
unsigned int next_bit; unsigned int next_bit;
size_t matchlen = 0; size_t matchlen = 0;
...@@ -407,31 +407,26 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) ...@@ -407,31 +407,26 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
raw_spin_lock_irqsave(&trie->lock, irq_flags); raw_spin_lock_irqsave(&trie->lock, irq_flags);
/* Walk the tree looking for an exact key/length match and keeping /* Walk the tree looking for an exact key/length match and keeping
* track of where we could begin trimming the tree. The trim-point * track of the path we traverse. We will need to know the node
* is the sub-tree along the walk consisting of only single-child * we wish to delete, and the slot that points to the node we want
* intermediate nodes and ending at a leaf node that we want to * to delete. We may also need to know the nodes parent and the
* remove. * slot that contains it.
*/ */
trim = &trie->root; trim = &trie->root;
node = rcu_dereference_protected( trim2 = trim;
trie->root, lockdep_is_held(&trie->lock)); parent = NULL;
while (node) { while ((node = rcu_dereference_protected(
*trim, lockdep_is_held(&trie->lock)))) {
matchlen = longest_prefix_match(trie, node, key); matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen || if (node->prefixlen != matchlen ||
node->prefixlen == key->prefixlen) node->prefixlen == key->prefixlen)
break; break;
parent = node;
trim2 = trim;
next_bit = extract_bit(key->data, node->prefixlen); next_bit = extract_bit(key->data, node->prefixlen);
/* If we hit a node that has more than one child or is a valid
* prefix itself, do not remove it. Reset the root of the trim
* path to its descendant on our path.
*/
if (!(node->flags & LPM_TREE_NODE_FLAG_IM) ||
(node->child[0] && node->child[1]))
trim = &node->child[next_bit]; trim = &node->child[next_bit];
node = rcu_dereference_protected(
node->child[next_bit], lockdep_is_held(&trie->lock));
} }
if (!node || node->prefixlen != key->prefixlen || if (!node || node->prefixlen != key->prefixlen ||
...@@ -442,27 +437,47 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) ...@@ -442,27 +437,47 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
trie->n_entries--; trie->n_entries--;
/* If the node we are removing is not a leaf node, simply mark it /* If the node we are removing has two children, simply mark it
* as intermediate and we are done. * as intermediate and we are done.
*/ */
if (rcu_access_pointer(node->child[0]) || if (rcu_access_pointer(node->child[0]) &&
rcu_access_pointer(node->child[1])) { rcu_access_pointer(node->child[1])) {
node->flags |= LPM_TREE_NODE_FLAG_IM; node->flags |= LPM_TREE_NODE_FLAG_IM;
goto out; goto out;
} }
/* trim should now point to the slot holding the start of a path from /* If the parent of the node we are about to delete is an intermediate
* zero or more intermediate nodes to our leaf node for deletion. * node, and the deleted node doesn't have any children, we can delete
* the intermediate parent as well and promote its other child
* up the tree. Doing this maintains the invariant that all
* intermediate nodes have exactly 2 children and that there are no
* unnecessary intermediate nodes in the tree.
*/ */
while ((node = rcu_dereference_protected( if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) &&
*trim, lockdep_is_held(&trie->lock)))) { !node->child[0] && !node->child[1]) {
RCU_INIT_POINTER(*trim, NULL); if (node == rcu_access_pointer(parent->child[0]))
trim = rcu_access_pointer(node->child[0]) ? rcu_assign_pointer(
&node->child[0] : *trim2, rcu_access_pointer(parent->child[1]));
&node->child[1]; else
rcu_assign_pointer(
*trim2, rcu_access_pointer(parent->child[0]));
kfree_rcu(parent, rcu);
kfree_rcu(node, rcu); kfree_rcu(node, rcu);
goto out;
} }
/* The node we are removing has either zero or one child. If there
* is a child, move it into the removed node's slot then delete
* the node. Otherwise just clear the slot and delete the node.
*/
if (node->child[0])
rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0]));
else if (node->child[1])
rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
else
RCU_INIT_POINTER(*trim, NULL);
kfree_rcu(node, rcu);
out: out:
raw_spin_unlock_irqrestore(&trie->lock, irq_flags); raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment