Commit f6b72b62 authored by David S. Miller's avatar David S. Miller

net: Embed hh_cache inside of struct neighbour.

Now that there is a one-to-one correspondance between neighbour
and hh_cache entries, we no longer need:

1) dynamic allocation
2) attachment to dst->hh
3) refcounting

Initialization of the hh_cache entry is indicated by hh_len
being non-zero, and such initialization is always done with
the neighbour's lock held as a writer.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 390fd0b3
...@@ -252,14 +252,7 @@ struct netdev_hw_addr_list { ...@@ -252,14 +252,7 @@ struct netdev_hw_addr_list {
netdev_hw_addr_list_for_each(ha, &(dev)->mc) netdev_hw_addr_list_for_each(ha, &(dev)->mc)
struct hh_cache { struct hh_cache {
atomic_t hh_refcnt; /* number of users */ u16 hh_len;
/*
* We want hh_output, hh_len, hh_lock and hh_data be a in a separate
* cache line on SMP.
* They are mostly read, but hh_refcnt may be changed quite frequently,
* incurring cache line ping pongs.
*/
u16 hh_len ____cacheline_aligned_in_smp;
u16 __pad; u16 __pad;
int (*hh_output)(struct sk_buff *skb); int (*hh_output)(struct sk_buff *skb);
seqlock_t hh_lock; seqlock_t hh_lock;
...@@ -273,12 +266,6 @@ struct hh_cache { ...@@ -273,12 +266,6 @@ struct hh_cache {
unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
}; };
static inline void hh_cache_put(struct hh_cache *hh)
{
if (atomic_dec_and_test(&hh->hh_refcnt))
kfree(hh);
}
/* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. /* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much.
* Alternative is: * Alternative is:
* dev->hard_header_len ? (dev->hard_header_len + * dev->hard_header_len ? (dev->hard_header_len +
......
...@@ -38,7 +38,6 @@ struct dst_entry { ...@@ -38,7 +38,6 @@ struct dst_entry {
unsigned long expires; unsigned long expires;
struct dst_entry *path; struct dst_entry *path;
struct neighbour *neighbour; struct neighbour *neighbour;
struct hh_cache *hh;
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
struct xfrm_state *xfrm; struct xfrm_state *xfrm;
#else #else
...@@ -47,6 +46,14 @@ struct dst_entry { ...@@ -47,6 +46,14 @@ struct dst_entry {
int (*input)(struct sk_buff*); int (*input)(struct sk_buff*);
int (*output)(struct sk_buff*); int (*output)(struct sk_buff*);
int flags;
#define DST_HOST 0x0001
#define DST_NOXFRM 0x0002
#define DST_NOPOLICY 0x0004
#define DST_NOHASH 0x0008
#define DST_NOCACHE 0x0010
#define DST_NOCOUNT 0x0020
short error; short error;
short obsolete; short obsolete;
unsigned short header_len; /* more space at head required */ unsigned short header_len; /* more space at head required */
...@@ -62,7 +69,7 @@ struct dst_entry { ...@@ -62,7 +69,7 @@ struct dst_entry {
* (L1_CACHE_SIZE would be too much) * (L1_CACHE_SIZE would be too much)
*/ */
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
long __pad_to_align_refcnt[1]; long __pad_to_align_refcnt[2];
#endif #endif
/* /*
* __refcnt wants to be on a different cache line from * __refcnt wants to be on a different cache line from
...@@ -71,13 +78,6 @@ struct dst_entry { ...@@ -71,13 +78,6 @@ struct dst_entry {
atomic_t __refcnt; /* client references */ atomic_t __refcnt; /* client references */
int __use; int __use;
unsigned long lastuse; unsigned long lastuse;
int flags;
#define DST_HOST 0x0001
#define DST_NOXFRM 0x0002
#define DST_NOPOLICY 0x0004
#define DST_NOHASH 0x0008
#define DST_NOCACHE 0x0010
#define DST_NOCOUNT 0x0020
union { union {
struct dst_entry *next; struct dst_entry *next;
struct rtable __rcu *rt_next; struct rtable __rcu *rt_next;
......
...@@ -108,7 +108,7 @@ struct neighbour { ...@@ -108,7 +108,7 @@ struct neighbour {
__u8 dead; __u8 dead;
seqlock_t ha_lock; seqlock_t ha_lock;
unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
struct hh_cache *hh; struct hh_cache hh;
int (*output)(struct sk_buff *skb); int (*output)(struct sk_buff *skb);
const struct neigh_ops *ops; const struct neigh_ops *ops;
struct rcu_head rcu; struct rcu_head rcu;
......
...@@ -343,14 +343,16 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) ...@@ -343,14 +343,16 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
{ {
struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct nf_bridge_info *nf_bridge = skb->nf_bridge;
struct neighbour *neigh;
struct dst_entry *dst; struct dst_entry *dst;
skb->dev = bridge_parent(skb->dev); skb->dev = bridge_parent(skb->dev);
if (!skb->dev) if (!skb->dev)
goto free_skb; goto free_skb;
dst = skb_dst(skb); dst = skb_dst(skb);
if (dst->hh) { neigh = dst->neighbour;
neigh_hh_bridge(dst->hh, skb); if (neigh->hh.hh_len) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev; skb->dev = nf_bridge->physindev;
return br_handle_frame_finish(skb); return br_handle_frame_finish(skb);
} else if (dst->neighbour) { } else if (dst->neighbour) {
......
...@@ -172,7 +172,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, ...@@ -172,7 +172,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->expires = 0UL; dst->expires = 0UL;
dst->path = dst; dst->path = dst;
dst->neighbour = NULL; dst->neighbour = NULL;
dst->hh = NULL;
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
dst->xfrm = NULL; dst->xfrm = NULL;
#endif #endif
...@@ -226,19 +225,13 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) ...@@ -226,19 +225,13 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
{ {
struct dst_entry *child; struct dst_entry *child;
struct neighbour *neigh; struct neighbour *neigh;
struct hh_cache *hh;
smp_rmb(); smp_rmb();
again: again:
neigh = dst->neighbour; neigh = dst->neighbour;
hh = dst->hh;
child = dst->child; child = dst->child;
dst->hh = NULL;
if (hh)
hh_cache_put(hh);
if (neigh) { if (neigh) {
dst->neighbour = NULL; dst->neighbour = NULL;
neigh_release(neigh); neigh_release(neigh);
......
...@@ -297,6 +297,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) ...@@ -297,6 +297,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
n->updated = n->used = now; n->updated = n->used = now;
n->nud_state = NUD_NONE; n->nud_state = NUD_NONE;
n->output = neigh_blackhole; n->output = neigh_blackhole;
seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms); n->parms = neigh_parms_clone(&tbl->parms);
setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
...@@ -702,14 +703,11 @@ void neigh_destroy(struct neighbour *neigh) ...@@ -702,14 +703,11 @@ void neigh_destroy(struct neighbour *neigh)
if (neigh_del_timer(neigh)) if (neigh_del_timer(neigh))
printk(KERN_WARNING "Impossible event.\n"); printk(KERN_WARNING "Impossible event.\n");
hh = neigh->hh; hh = &neigh->hh;
if (hh) { if (hh->hh_len) {
neigh->hh = NULL;
write_seqlock_bh(&hh->hh_lock); write_seqlock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole; hh->hh_output = neigh_blackhole;
write_sequnlock_bh(&hh->hh_lock); write_sequnlock_bh(&hh->hh_lock);
hh_cache_put(hh);
} }
skb_queue_purge(&neigh->arp_queue); skb_queue_purge(&neigh->arp_queue);
...@@ -737,8 +735,8 @@ static void neigh_suspect(struct neighbour *neigh) ...@@ -737,8 +735,8 @@ static void neigh_suspect(struct neighbour *neigh)
neigh->output = neigh->ops->output; neigh->output = neigh->ops->output;
hh = neigh->hh; hh = &neigh->hh;
if (hh) if (hh->hh_len)
hh->hh_output = neigh->ops->output; hh->hh_output = neigh->ops->output;
} }
...@@ -755,8 +753,8 @@ static void neigh_connect(struct neighbour *neigh) ...@@ -755,8 +753,8 @@ static void neigh_connect(struct neighbour *neigh)
neigh->output = neigh->ops->connected_output; neigh->output = neigh->ops->connected_output;
hh = neigh->hh; hh = &neigh->hh;
if (hh) if (hh->hh_len)
hh->hh_output = neigh->ops->hh_output; hh->hh_output = neigh->ops->hh_output;
} }
...@@ -1017,7 +1015,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) ...@@ -1017,7 +1015,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
} }
EXPORT_SYMBOL(__neigh_event_send); EXPORT_SYMBOL(__neigh_event_send);
static void neigh_update_hhs(const struct neighbour *neigh) static void neigh_update_hhs(struct neighbour *neigh)
{ {
struct hh_cache *hh; struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
...@@ -1027,8 +1025,8 @@ static void neigh_update_hhs(const struct neighbour *neigh) ...@@ -1027,8 +1025,8 @@ static void neigh_update_hhs(const struct neighbour *neigh)
update = neigh->dev->header_ops->cache_update; update = neigh->dev->header_ops->cache_update;
if (update) { if (update) {
hh = neigh->hh; hh = &neigh->hh;
if (hh) { if (hh->hh_len) {
write_seqlock_bh(&hh->hh_lock); write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha); update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock); write_sequnlock_bh(&hh->hh_lock);
...@@ -1214,62 +1212,29 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, ...@@ -1214,62 +1212,29 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
} }
EXPORT_SYMBOL(neigh_event_ns); EXPORT_SYMBOL(neigh_event_ns);
static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst)
{
struct hh_cache *hh;
smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
hh = n->hh;
if (hh) {
atomic_inc(&hh->hh_refcnt);
if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
hh_cache_put(hh);
return true;
}
return false;
}
/* called with read_lock_bh(&n->lock); */ /* called with read_lock_bh(&n->lock); */
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
__be16 protocol)
{ {
struct hh_cache *hh;
struct net_device *dev = dst->dev; struct net_device *dev = dst->dev;
__be16 prot = dst->ops->protocol;
if (likely(neigh_hh_lookup(n, dst))) struct hh_cache *hh = &n->hh;
return;
/* slow path */
hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
if (!hh)
return;
seqlock_init(&hh->hh_lock);
atomic_set(&hh->hh_refcnt, 2);
if (dev->header_ops->cache(n, hh, protocol)) {
kfree(hh);
return;
}
write_lock_bh(&n->lock); write_lock_bh(&n->lock);
/* must check if another thread already did the insert */ /* Only one thread can come in here and initialize the
if (neigh_hh_lookup(n, dst)) { * hh_cache entry.
kfree(hh); */
if (hh->hh_len)
goto end;
if (dev->header_ops->cache(n, hh, prot))
goto end; goto end;
}
if (n->nud_state & NUD_CONNECTED) if (n->nud_state & NUD_CONNECTED)
hh->hh_output = n->ops->hh_output; hh->hh_output = n->ops->hh_output;
else else
hh->hh_output = n->ops->output; hh->hh_output = n->ops->output;
smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
n->hh = hh;
if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
hh_cache_put(hh);
end: end:
write_unlock_bh(&n->lock); write_unlock_bh(&n->lock);
} }
...@@ -1312,10 +1277,8 @@ int neigh_resolve_output(struct sk_buff *skb) ...@@ -1312,10 +1277,8 @@ int neigh_resolve_output(struct sk_buff *skb)
struct net_device *dev = neigh->dev; struct net_device *dev = neigh->dev;
unsigned int seq; unsigned int seq;
if (dev->header_ops->cache && if (dev->header_ops->cache && !neigh->hh.hh_len)
!dst->hh && neigh_hh_init(neigh, dst);
!(dst->flags & DST_NOCACHE))
neigh_hh_init(neigh, dst, dst->ops->protocol);
do { do {
seq = read_seqbegin(&neigh->ha_lock); seq = read_seqbegin(&neigh->ha_lock);
......
...@@ -182,6 +182,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) ...@@ -182,6 +182,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
struct rtable *rt = (struct rtable *)dst; struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev; struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev); unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
if (rt->rt_type == RTN_MULTICAST) { if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
...@@ -203,11 +204,14 @@ static inline int ip_finish_output2(struct sk_buff *skb) ...@@ -203,11 +204,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
skb = skb2; skb = skb2;
} }
if (dst->hh) neigh = dst->neighbour;
return neigh_hh_output(dst->hh, skb); if (neigh) {
else if (dst->neighbour) struct hh_cache *hh = &neigh->hh;
if (hh->hh_len)
return neigh_hh_output(hh, skb);
else
return dst->neighbour->output(skb); return dst->neighbour->output(skb);
}
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
kfree_skb(skb); kfree_skb(skb);
......
...@@ -426,9 +426,10 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) ...@@ -426,9 +426,10 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
dst_metric(&r->dst, RTAX_RTTVAR)), dst_metric(&r->dst, RTAX_RTTVAR)),
r->rt_key_tos, r->rt_key_tos,
r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, -1,
r->dst.hh ? (r->dst.hh->hh_output == (r->dst.neighbour ?
dev_queue_xmit) : 0, (r->dst.neighbour->hh.hh_output ==
dev_queue_xmit) : 0),
r->rt_spec_dst, &len); r->rt_spec_dst, &len);
seq_printf(seq, "%*s\n", 127 - len, ""); seq_printf(seq, "%*s\n", 127 - len, "");
......
...@@ -100,6 +100,7 @@ static int ip6_finish_output2(struct sk_buff *skb) ...@@ -100,6 +100,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
{ {
struct dst_entry *dst = skb_dst(skb); struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev; struct net_device *dev = dst->dev;
struct neighbour *neigh;
skb->protocol = htons(ETH_P_IPV6); skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev; skb->dev = dev;
...@@ -134,11 +135,14 @@ static int ip6_finish_output2(struct sk_buff *skb) ...@@ -134,11 +135,14 @@ static int ip6_finish_output2(struct sk_buff *skb)
skb->len); skb->len);
} }
if (dst->hh) neigh = dst->neighbour;
return neigh_hh_output(dst->hh, skb); if (neigh) {
else if (dst->neighbour) struct hh_cache *hh = &neigh->hh;
if (hh->hh_len)
return neigh_hh_output(hh, skb);
else
return dst->neighbour->output(skb); return dst->neighbour->output(skb);
}
IP6_INC_STATS_BH(dev_net(dst->dev), IP6_INC_STATS_BH(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb); kfree_skb(skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment