Commit e495f78d authored by David S. Miller's avatar David S. Miller

Merge branch 'fib_trie-next'

Alexander Duyck says:

====================
fib_trie: Reduce time spent in fib_table_lookup by 35 to 75%

These patches are meant to address several performance issues I have seen
in the fib_trie implementation, and fib_table_lookup specifically.  With
these changes in place I have seen a reduction of up to 35 to 75% for the
total time spent in fib_table_lookup depending on the type of search being
performed.

On a VM running in my Corei7-4930K system with a trie of maximum depth of 7
this resulted in a reduction of over 370ns per packet in the total time to
process packets received from an ixgbe interface and route them to a dummy
interface.  This represents a failed lookup in the local trie followed by
a successful search in the main trie.

				Baseline	Refactor
  ixgbe->dummy routing		1.20Mpps	2.21Mpps
  ------------------------------------------------------------
  processing time per packet		835ns		453ns
  fib_table_lookup		50.1%	418ns	25.0%	113ns
  check_leaf.isra.9		 7.9%	 66ns	   --	 --
  ixgbe_clean_rx_irq		 5.3%	 44ns	 9.8%	 44ns
  ip_route_input_noref		 2.9%	 25ns	 4.6%	 21ns
  pvclock_clocksource_read	 2.6%	 21ns	 4.6%	 21ns
  ip_rcv			 2.6%	 22ns	 4.0%	 18ns

In the simple case of receiving a frame and dropping it before it can reach
the socket layer I saw a reduction of 40ns per packet.  This represents a
trip through the local trie with the correct leaf found with no need for
any backtracing.

				Baseline	Refactor
  ixgbe->local receive		2.65Mpps	2.96Mpps
  ------------------------------------------------------------
  processing time per packet		377ns		337ns
  fib_table_lookup		25.1%	 95ns	25.8%	 87ns
  ixgbe_clean_rx_irq		 8.7%	 33ns	 9.0%	 30ns
  check_leaf.isra.9		 7.2%	 27ns	   --	 --
  ip_rcv			 5.7%	 21ns	 6.5%	 22ns

These changes have resulted in several functions being inlined such as
check_leaf and fib_find_node, but due to the code simplification the
overall size of the code has been reduced.

   text	   data	    bss	    dec	    hex	filename
  16932	    376	     16	  17324	   43ac	net/ipv4/fib_trie.o - before
  15259	    376	      8	  15643	   3d1b	net/ipv4/fib_trie.o - after

Changes since RFC:
  Replaced this_cpu_ptr with correct call to this_cpu_inc in patch 1
  Changed test for leaf_info mismatch to (key ^ n->key) & li->mask_plen in patch 10
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bec94d43 5405afd1
...@@ -222,16 +222,19 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id) ...@@ -222,16 +222,19 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id)
static inline int fib_lookup(struct net *net, const struct flowi4 *flp, static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
struct fib_result *res) struct fib_result *res)
{ {
struct fib_table *table; int err = -ENETUNREACH;
rcu_read_lock();
if (!fib_table_lookup(fib_get_table(net, RT_TABLE_LOCAL), flp, res,
FIB_LOOKUP_NOREF) ||
!fib_table_lookup(fib_get_table(net, RT_TABLE_MAIN), flp, res,
FIB_LOOKUP_NOREF))
err = 0;
table = fib_get_table(net, RT_TABLE_LOCAL); rcu_read_unlock();
if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
return 0;
table = fib_get_table(net, RT_TABLE_MAIN); return err;
if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
return 0;
return -ENETUNREACH;
} }
#else /* CONFIG_IP_MULTIPLE_TABLES */ #else /* CONFIG_IP_MULTIPLE_TABLES */
...@@ -247,20 +250,25 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, ...@@ -247,20 +250,25 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res) struct fib_result *res)
{ {
if (!net->ipv4.fib_has_custom_rules) { if (!net->ipv4.fib_has_custom_rules) {
int err = -ENETUNREACH;
rcu_read_lock();
res->tclassid = 0; res->tclassid = 0;
if (net->ipv4.fib_local && if ((net->ipv4.fib_local &&
!fib_table_lookup(net->ipv4.fib_local, flp, res, !fib_table_lookup(net->ipv4.fib_local, flp, res,
FIB_LOOKUP_NOREF)) FIB_LOOKUP_NOREF)) ||
return 0; (net->ipv4.fib_main &&
if (net->ipv4.fib_main && !fib_table_lookup(net->ipv4.fib_main, flp, res,
!fib_table_lookup(net->ipv4.fib_main, flp, res, FIB_LOOKUP_NOREF)) ||
FIB_LOOKUP_NOREF)) (net->ipv4.fib_default &&
return 0; !fib_table_lookup(net->ipv4.fib_default, flp, res,
if (net->ipv4.fib_default && FIB_LOOKUP_NOREF)))
!fib_table_lookup(net->ipv4.fib_default, flp, res, err = 0;
FIB_LOOKUP_NOREF))
return 0; rcu_read_unlock();
return -ENETUNREACH;
return err;
} }
return __fib_lookup(net, flp, res); return __fib_lookup(net, flp, res);
} }
......
...@@ -67,7 +67,7 @@ static int __net_init fib4_rules_init(struct net *net) ...@@ -67,7 +67,7 @@ static int __net_init fib4_rules_init(struct net *net)
return 0; return 0;
fail: fail:
kfree(local_table); fib_free_table(local_table);
return -ENOMEM; return -ENOMEM;
} }
#else #else
...@@ -109,6 +109,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) ...@@ -109,6 +109,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
return tb; return tb;
} }
/* caller must hold either rtnl or rcu read lock */
struct fib_table *fib_get_table(struct net *net, u32 id) struct fib_table *fib_get_table(struct net *net, u32 id)
{ {
struct fib_table *tb; struct fib_table *tb;
...@@ -119,15 +120,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id) ...@@ -119,15 +120,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
id = RT_TABLE_MAIN; id = RT_TABLE_MAIN;
h = id & (FIB_TABLE_HASHSZ - 1); h = id & (FIB_TABLE_HASHSZ - 1);
rcu_read_lock();
head = &net->ipv4.fib_table_hash[h]; head = &net->ipv4.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, head, tb_hlist) { hlist_for_each_entry_rcu(tb, head, tb_hlist) {
if (tb->tb_id == id) { if (tb->tb_id == id)
rcu_read_unlock();
return tb; return tb;
}
} }
rcu_read_unlock();
return NULL; return NULL;
} }
#endif /* CONFIG_IP_MULTIPLE_TABLES */ #endif /* CONFIG_IP_MULTIPLE_TABLES */
...@@ -167,16 +164,18 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, ...@@ -167,16 +164,18 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
if (ipv4_is_multicast(addr)) if (ipv4_is_multicast(addr))
return RTN_MULTICAST; return RTN_MULTICAST;
rcu_read_lock();
local_table = fib_get_table(net, RT_TABLE_LOCAL); local_table = fib_get_table(net, RT_TABLE_LOCAL);
if (local_table) { if (local_table) {
ret = RTN_UNICAST; ret = RTN_UNICAST;
rcu_read_lock();
if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
if (!dev || dev == res.fi->fib_dev) if (!dev || dev == res.fi->fib_dev)
ret = res.type; ret = res.type;
} }
rcu_read_unlock();
} }
rcu_read_unlock();
return ret; return ret;
} }
...@@ -919,7 +918,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) ...@@ -919,7 +918,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
#undef BRD1_OK #undef BRD1_OK
} }
static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
{ {
struct fib_result res; struct fib_result res;
...@@ -929,6 +928,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) ...@@ -929,6 +928,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
.flowi4_tos = frn->fl_tos, .flowi4_tos = frn->fl_tos,
.flowi4_scope = frn->fl_scope, .flowi4_scope = frn->fl_scope,
}; };
struct fib_table *tb;
rcu_read_lock();
tb = fib_get_table(net, frn->tb_id_in);
frn->err = -ENOENT; frn->err = -ENOENT;
if (tb) { if (tb) {
...@@ -945,6 +949,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) ...@@ -945,6 +949,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
} }
local_bh_enable(); local_bh_enable();
} }
rcu_read_unlock();
} }
static void nl_fib_input(struct sk_buff *skb) static void nl_fib_input(struct sk_buff *skb)
...@@ -952,7 +958,6 @@ static void nl_fib_input(struct sk_buff *skb) ...@@ -952,7 +958,6 @@ static void nl_fib_input(struct sk_buff *skb)
struct net *net; struct net *net;
struct fib_result_nl *frn; struct fib_result_nl *frn;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
struct fib_table *tb;
u32 portid; u32 portid;
net = sock_net(skb->sk); net = sock_net(skb->sk);
...@@ -967,9 +972,7 @@ static void nl_fib_input(struct sk_buff *skb) ...@@ -967,9 +972,7 @@ static void nl_fib_input(struct sk_buff *skb)
nlh = nlmsg_hdr(skb); nlh = nlmsg_hdr(skb);
frn = (struct fib_result_nl *) nlmsg_data(nlh); frn = (struct fib_result_nl *) nlmsg_data(nlh);
tb = fib_get_table(net, frn->tb_id_in); nl_fib_lookup(net, frn);
nl_fib_lookup(frn, tb);
portid = NETLINK_CB(skb).portid; /* netlink portid */ portid = NETLINK_CB(skb).portid; /* netlink portid */
NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).portid = 0; /* from kernel */
......
...@@ -81,27 +81,25 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, ...@@ -81,27 +81,25 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
break; break;
case FR_ACT_UNREACHABLE: case FR_ACT_UNREACHABLE:
err = -ENETUNREACH; return -ENETUNREACH;
goto errout;
case FR_ACT_PROHIBIT: case FR_ACT_PROHIBIT:
err = -EACCES; return -EACCES;
goto errout;
case FR_ACT_BLACKHOLE: case FR_ACT_BLACKHOLE:
default: default:
err = -EINVAL; return -EINVAL;
goto errout;
} }
rcu_read_lock();
tbl = fib_get_table(rule->fr_net, rule->table); tbl = fib_get_table(rule->fr_net, rule->table);
if (!tbl) if (tbl)
goto errout; err = fib_table_lookup(tbl, &flp->u.ip4,
(struct fib_result *)arg->result,
arg->flags);
err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); rcu_read_unlock();
if (err > 0)
err = -EAGAIN;
errout:
return err; return err;
} }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment