Commit 0ed8ddf4 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

neigh: Protect neigh->ha[] with a seqlock

Add a seqlock in struct neighbour to protect neigh->ha[], and avoid
dirtying neighbour in stress situation (many different flows / dsts)

Dirtying takes place because of read_lock(&n->lock) and n->used writes.

Switching to a seqlock, and writing n->used only on jiffies changes
permits less dirtying.
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d122179a
...@@ -105,6 +105,7 @@ struct neighbour { ...@@ -105,6 +105,7 @@ struct neighbour {
atomic_t refcnt; atomic_t refcnt;
atomic_t probes; atomic_t probes;
rwlock_t lock; rwlock_t lock;
seqlock_t ha_lock;
unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
struct hh_cache *hh; struct hh_cache *hh;
int (*output)(struct sk_buff *skb); int (*output)(struct sk_buff *skb);
...@@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh) ...@@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh)
static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{ {
neigh->used = jiffies; unsigned long now = ACCESS_ONCE(jiffies);
if (neigh->used != now)
neigh->used = now;
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
return __neigh_event_send(neigh, skb); return __neigh_event_send(neigh, skb);
return 0; return 0;
...@@ -373,4 +377,14 @@ struct neighbour_cb { ...@@ -373,4 +377,14 @@ struct neighbour_cb {
#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
const struct net_device *dev)
{
unsigned int seq;
do {
seq = read_seqbegin(&n->ha_lock);
memcpy(dst, n->ha, dev->addr_len);
} while (read_seqretry(&n->ha_lock, seq));
}
#endif #endif
...@@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) ...@@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
skb_queue_head_init(&n->arp_queue); skb_queue_head_init(&n->arp_queue);
rwlock_init(&n->lock); rwlock_init(&n->lock);
seqlock_init(&n->ha_lock);
n->updated = n->used = now; n->updated = n->used = now;
n->nud_state = NUD_NONE; n->nud_state = NUD_NONE;
n->output = neigh_blackhole; n->output = neigh_blackhole;
...@@ -1015,7 +1016,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) ...@@ -1015,7 +1016,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
} }
EXPORT_SYMBOL(__neigh_event_send); EXPORT_SYMBOL(__neigh_event_send);
static void neigh_update_hhs(struct neighbour *neigh) static void neigh_update_hhs(const struct neighbour *neigh)
{ {
struct hh_cache *hh; struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
...@@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, ...@@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
} }
if (lladdr != neigh->ha) { if (lladdr != neigh->ha) {
write_seqlock(&neigh->ha_lock);
memcpy(&neigh->ha, lladdr, dev->addr_len); memcpy(&neigh->ha, lladdr, dev->addr_len);
write_sequnlock(&neigh->ha_lock);
neigh_update_hhs(neigh); neigh_update_hhs(neigh);
if (!(new & NUD_CONNECTED)) if (!(new & NUD_CONNECTED))
neigh->confirmed = jiffies - neigh->confirmed = jiffies -
...@@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, ...@@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
{ {
struct hh_cache *hh; struct hh_cache *hh;
smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
for (hh = n->hh; hh; hh = hh->hh_next) { for (hh = n->hh; hh; hh = hh->hh_next) {
if (hh->hh_type == protocol) { if (hh->hh_type == protocol) {
atomic_inc(&hh->hh_refcnt); atomic_inc(&hh->hh_refcnt);
...@@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, ...@@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
kfree(hh); kfree(hh);
return; return;
} }
read_unlock(&n->lock);
write_lock(&n->lock); write_lock_bh(&n->lock);
/* must check if another thread already did the insert */ /* must check if another thread already did the insert */
if (neigh_hh_lookup(n, dst, protocol)) { if (neigh_hh_lookup(n, dst, protocol)) {
...@@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, ...@@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
hh->hh_output = n->ops->output; hh->hh_output = n->ops->output;
hh->hh_next = n->hh; hh->hh_next = n->hh;
smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
n->hh = hh; n->hh = hh;
if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
hh_cache_put(hh); hh_cache_put(hh);
end: end:
write_unlock(&n->lock); write_unlock_bh(&n->lock);
read_lock(&n->lock);
} }
/* This function can be used in contexts, where only old dev_queue_xmit /* This function can be used in contexts, where only old dev_queue_xmit
...@@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb) ...@@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb)
if (!neigh_event_send(neigh, skb)) { if (!neigh_event_send(neigh, skb)) {
int err; int err;
struct net_device *dev = neigh->dev; struct net_device *dev = neigh->dev;
unsigned int seq;
read_lock_bh(&neigh->lock);
if (dev->header_ops->cache && if (dev->header_ops->cache &&
!dst->hh && !dst->hh &&
!(dst->flags & DST_NOCACHE)) !(dst->flags & DST_NOCACHE))
neigh_hh_init(neigh, dst, dst->ops->protocol); neigh_hh_init(neigh, dst, dst->ops->protocol);
err = dev_hard_header(skb, dev, ntohs(skb->protocol), do {
neigh->ha, NULL, skb->len); seq = read_seqbegin(&neigh->ha_lock);
read_unlock_bh(&neigh->lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
} while (read_seqretry(&neigh->ha_lock, seq));
if (err >= 0) if (err >= 0)
rc = neigh->ops->queue_xmit(skb); rc = neigh->ops->queue_xmit(skb);
...@@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb) ...@@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb); struct dst_entry *dst = skb_dst(skb);
struct neighbour *neigh = dst->neighbour; struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev; struct net_device *dev = neigh->dev;
unsigned int seq;
__skb_pull(skb, skb_network_offset(skb)); __skb_pull(skb, skb_network_offset(skb));
read_lock_bh(&neigh->lock); do {
err = dev_hard_header(skb, dev, ntohs(skb->protocol), seq = read_seqbegin(&neigh->ha_lock);
neigh->ha, NULL, skb->len); err = dev_hard_header(skb, dev, ntohs(skb->protocol),
read_unlock_bh(&neigh->lock); neigh->ha, NULL, skb->len);
} while (read_seqretry(&neigh->ha_lock, seq));
if (err >= 0) if (err >= 0)
err = neigh->ops->queue_xmit(skb); err = neigh->ops->queue_xmit(skb);
else { else {
...@@ -2148,10 +2157,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, ...@@ -2148,10 +2157,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
read_lock_bh(&neigh->lock); read_lock_bh(&neigh->lock);
ndm->ndm_state = neigh->nud_state; ndm->ndm_state = neigh->nud_state;
if ((neigh->nud_state & NUD_VALID) && if (neigh->nud_state & NUD_VALID) {
nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { char haddr[MAX_ADDR_LEN];
read_unlock_bh(&neigh->lock);
goto nla_put_failure; neigh_ha_snapshot(haddr, neigh, neigh->dev);
if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
read_unlock_bh(&neigh->lock);
goto nla_put_failure;
}
} }
ci.ndm_used = jiffies_to_clock_t(now - neigh->used); ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
......
...@@ -502,10 +502,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) ...@@ -502,10 +502,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
if (n) { if (n) {
n->used = jiffies; n->used = jiffies;
if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) { if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
read_lock_bh(&n->lock); neigh_ha_snapshot(haddr, n, dev);
memcpy(haddr, n->ha, dev->addr_len);
read_unlock_bh(&n->lock);
neigh_release(n); neigh_release(n);
return 0; return 0;
} }
......
...@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * ...@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
} }
if (neigh_event_send(n, skb_res) == 0) { if (neigh_event_send(n, skb_res) == 0) {
int err; int err;
char haddr[MAX_ADDR_LEN];
read_lock(&n->lock); neigh_ha_snapshot(haddr, n, dev);
err = dev_hard_header(skb, dev, ntohs(skb->protocol), err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
n->ha, NULL, skb->len); NULL, skb->len);
read_unlock(&n->lock);
if (err < 0) { if (err < 0) {
neigh_release(n); neigh_release(n);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment