Commit 40501f90 authored by Jon Paul Maloy's avatar Jon Paul Maloy Committed by David S. Miller

tipc: don't reset stale broadcast send link

When the broadcast send link after 100 attempts has failed to
transfer a packet to all peers, we consider it stale, and reset
it. Thereafter it needs to re-synchronize with the peers, something
currently done by just resetting and re-establishing all links to
all peers. This has turned out to be overkill, with potentially
unwanted consequences for the remaining cluster.

A closer analysis reveals that this can be done much simpler. When
this kind of failure happens, for reasons that may lie outside the
TIPC protocol, it is typically only one peer which is failing to
receive and acknowledge packets. It is hence sufficient to identify
and reset the links only to that peer to resolve the situation, without
having to reset the broadcast link at all. This solution entails a much
lower risk of negative consequences for the own node as well as for
the overall cluster.

We implement this change in this commit.
Reviewed-by: default avatarParthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Acked-by: default avatarYing Xue <ying.xue@windriver.com>
Signed-off-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e65a4955
...@@ -365,30 +365,6 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) ...@@ -365,30 +365,6 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
return 0; return 0;
} }
/* tipc_bearer_reset_all - reset all links on all bearers
*/
void tipc_bearer_reset_all(struct net *net)
{
struct tipc_bearer *b;
int i;
for (i = 0; i < MAX_BEARERS; i++) {
b = bearer_get(net, i);
if (b)
clear_bit_unlock(0, &b->up);
}
for (i = 0; i < MAX_BEARERS; i++) {
b = bearer_get(net, i);
if (b)
tipc_reset_bearer(net, b);
}
for (i = 0; i < MAX_BEARERS; i++) {
b = bearer_get(net, i);
if (b)
test_and_set_bit_lock(0, &b->up);
}
}
/** /**
* bearer_disable * bearer_disable
* *
......
...@@ -210,7 +210,6 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); ...@@ -210,7 +210,6 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id);
struct tipc_media *tipc_media_find(const char *name); struct tipc_media *tipc_media_find(const char *name);
void tipc_bearer_reset_all(struct net *net);
int tipc_bearer_setup(void); int tipc_bearer_setup(void);
void tipc_bearer_cleanup(void); void tipc_bearer_cleanup(void);
void tipc_bearer_stop(struct net *net); void tipc_bearer_stop(struct net *net);
......
...@@ -978,15 +978,15 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb) ...@@ -978,15 +978,15 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
struct tipc_msg *hdr = buf_msg(skb); struct tipc_msg *hdr = buf_msg(skb);
pr_warn("Retransmission failure on link <%s>\n", l->name); pr_warn("Retransmission failure on link <%s>\n", l->name);
link_print(l, "Resetting link "); link_print(l, "State of link ");
pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
pr_info("sqno %u, prev: %x, src: %x\n", pr_info("sqno %u, prev: %x, src: %x\n",
msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
} }
int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to, int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
struct sk_buff_head *xmitq) u16 from, u16 to, struct sk_buff_head *xmitq)
{ {
struct sk_buff *_skb, *skb = skb_peek(&l->transmq); struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
struct tipc_msg *hdr; struct tipc_msg *hdr;
...@@ -997,11 +997,14 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to, ...@@ -997,11 +997,14 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
return 0; return 0;
/* Detect repeated retransmit failures on same packet */ /* Detect repeated retransmit failures on same packet */
if (likely(l->last_retransm != buf_seqno(skb))) { if (nacker->last_retransm != buf_seqno(skb)) {
l->last_retransm = buf_seqno(skb); nacker->last_retransm = buf_seqno(skb);
l->stale_count = 1; nacker->stale_count = 1;
} else if (++l->stale_count > 100) { } else if (++nacker->stale_count > 100) {
link_retransmit_failure(l, skb); link_retransmit_failure(l, skb);
nacker->stale_count = 0;
if (link_is_bc_sndlink(l))
return TIPC_LINK_DOWN_EVT;
return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
} }
...@@ -1528,7 +1531,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, ...@@ -1528,7 +1531,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* If NACK, retransmit will now start at right position */ /* If NACK, retransmit will now start at right position */
if (gap) { if (gap) {
rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq); rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq);
l->stats.recv_nacks++; l->stats.recv_nacks++;
} }
...@@ -1680,7 +1683,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, ...@@ -1680,7 +1683,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
return rc; return rc;
if (link_bc_retr_eval(snd_l, &from, &to)) if (link_bc_retr_eval(snd_l, &from, &to))
rc = tipc_link_retrans(snd_l, from, to, xmitq); rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
l->snd_nxt = peers_snd_nxt; l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l)) if (link_bc_rcv_gap(l))
...@@ -1775,7 +1778,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, ...@@ -1775,7 +1778,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
if (dnode == tipc_own_addr(l->net)) { if (dnode == tipc_own_addr(l->net)) {
tipc_link_bc_ack_rcv(l, acked, xmitq); tipc_link_bc_ack_rcv(l, acked, xmitq);
rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq);
l->stats.recv_nacks++; l->stats.recv_nacks++;
return rc; return rc;
} }
......
...@@ -1284,7 +1284,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, ...@@ -1284,7 +1284,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
if (rc & TIPC_LINK_DOWN_EVT) { if (rc & TIPC_LINK_DOWN_EVT) {
tipc_bearer_reset_all(n->net); tipc_node_reset_links(n);
return; return;
} }
...@@ -1351,15 +1351,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id ...@@ -1351,15 +1351,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
if (!skb_queue_empty(&be->inputq1)) if (!skb_queue_empty(&be->inputq1))
tipc_node_mcast_rcv(n); tipc_node_mcast_rcv(n);
if (rc & TIPC_LINK_DOWN_EVT) { /* If reassembly or retransmission failure => reset all links to peer */
/* Reception reassembly failure => reset all links to peer */ if (rc & TIPC_LINK_DOWN_EVT)
if (!tipc_link_is_up(be->link)) tipc_node_reset_links(n);
tipc_node_reset_links(n);
/* Retransmission failure => reset all links to all peers */
if (!tipc_link_is_up(tipc_bc_sndlink(net)))
tipc_bearer_reset_all(net);
}
tipc_node_put(n); tipc_node_put(n);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment