Commit eb929a91 authored by Jon Maloy's avatar Jon Maloy Committed by David S. Miller

tipc: improve poll() for group member socket

The current criteria for returning POLLOUT from a group member socket is
too simplistic. It basically returns POLLOUT as soon as the group has
external destinations, something obviously leading to a lot of spinning
during destination congestion situations. At the same time, the internal
congestion handling is unnecessarily complex.

We now change this as follows.

- We introduce an 'open' flag in  struct tipc_group. This flag is used
  only to help poll() get the setting of POLLOUT right, and *not* for
  congeston handling as such. This means that a user can choose to
  ignore an  EAGAIN for a destination and go on sending messages to
  other destinations in the group if he wants to.

- The flag is set to false every time we return EAGAIN on a send call.

- The flag is set to true every time any member, i.e., not necessarily
  the member that caused EAGAIN, is removed from the small_win list.

- We remove the group member 'usr_pending' flag. The size of the send
  window and presence in the 'small_win' list is sufficient criteria
  for recognizing congestion.

This solution seems to be a reasonable compromise between 'anycast',
which is normally not waiting for POLLOUT for a specific destination,
and the other three send modes, which are.
Acked-by: default avatarYing Xue <ying.xue@windriver.com>
Signed-off-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 232d07b7
...@@ -74,7 +74,6 @@ struct tipc_member { ...@@ -74,7 +74,6 @@ struct tipc_member {
u16 bc_rcv_nxt; u16 bc_rcv_nxt;
u16 bc_syncpt; u16 bc_syncpt;
u16 bc_acked; u16 bc_acked;
bool usr_pending;
}; };
struct tipc_group { struct tipc_group {
...@@ -96,11 +95,27 @@ struct tipc_group { ...@@ -96,11 +95,27 @@ struct tipc_group {
u16 bc_ackers; u16 bc_ackers;
bool loopback; bool loopback;
bool events; bool events;
bool open;
}; };
static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
int mtyp, struct sk_buff_head *xmitq); int mtyp, struct sk_buff_head *xmitq);
bool tipc_group_is_open(struct tipc_group *grp)
{
return grp->open;
}
static void tipc_group_open(struct tipc_member *m, bool *wakeup)
{
*wakeup = false;
if (list_empty(&m->small_win))
return;
list_del_init(&m->small_win);
m->group->open = true;
*wakeup = true;
}
static void tipc_group_decr_active(struct tipc_group *grp, static void tipc_group_decr_active(struct tipc_group *grp,
struct tipc_member *m) struct tipc_member *m)
{ {
...@@ -406,20 +421,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, ...@@ -406,20 +421,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
int adv, state; int adv, state;
m = tipc_group_find_dest(grp, dnode, dport); m = tipc_group_find_dest(grp, dnode, dport);
*mbr = m; if (!tipc_group_is_receiver(m)) {
if (!m) *mbr = NULL;
return false; return false;
if (m->usr_pending) }
return true; *mbr = m;
if (m->window >= len) if (m->window >= len)
return false; return false;
m->usr_pending = true;
grp->open = false;
/* If not fully advertised, do it now to prevent mutual blocking */ /* If not fully advertised, do it now to prevent mutual blocking */
adv = m->advertised; adv = m->advertised;
state = m->state; state = m->state;
if (state < MBR_JOINED)
return true;
if (state == MBR_JOINED && adv == ADV_IDLE) if (state == MBR_JOINED && adv == ADV_IDLE)
return true; return true;
if (state == MBR_ACTIVE && adv == ADV_ACTIVE) if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
...@@ -437,9 +452,10 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) ...@@ -437,9 +452,10 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len)
struct tipc_member *m = NULL; struct tipc_member *m = NULL;
/* If prev bcast was replicast, reject until all receivers have acked */ /* If prev bcast was replicast, reject until all receivers have acked */
if (grp->bc_ackers) if (grp->bc_ackers) {
grp->open = false;
return true; return true;
}
if (list_empty(&grp->small_win)) if (list_empty(&grp->small_win))
return false; return false;
...@@ -754,9 +770,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, ...@@ -754,9 +770,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
/* Member can be taken into service */ /* Member can be taken into service */
m->state = MBR_JOINED; m->state = MBR_JOINED;
*usr_wakeup = true; tipc_group_open(m, usr_wakeup);
m->usr_pending = false;
list_del_init(&m->small_win);
tipc_group_update_member(m, 0); tipc_group_update_member(m, 0);
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
tipc_group_create_event(grp, m, TIPC_PUBLISHED, tipc_group_create_event(grp, m, TIPC_PUBLISHED,
...@@ -767,8 +781,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, ...@@ -767,8 +781,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
return; return;
m->bc_syncpt = msg_grp_bc_syncpt(hdr); m->bc_syncpt = msg_grp_bc_syncpt(hdr);
list_del_init(&m->list); list_del_init(&m->list);
list_del_init(&m->small_win); tipc_group_open(m, usr_wakeup);
*usr_wakeup = true;
tipc_group_decr_active(grp, m); tipc_group_decr_active(grp, m);
m->state = MBR_LEAVING; m->state = MBR_LEAVING;
tipc_group_create_event(grp, m, TIPC_WITHDRAWN, tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
...@@ -778,26 +791,25 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, ...@@ -778,26 +791,25 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
if (!m) if (!m)
return; return;
m->window += msg_adv_win(hdr); m->window += msg_adv_win(hdr);
*usr_wakeup = m->usr_pending; tipc_group_open(m, usr_wakeup);
m->usr_pending = false;
list_del_init(&m->small_win);
return; return;
case GRP_ACK_MSG: case GRP_ACK_MSG:
if (!m) if (!m)
return; return;
m->bc_acked = msg_grp_bc_acked(hdr); m->bc_acked = msg_grp_bc_acked(hdr);
if (--grp->bc_ackers) if (--grp->bc_ackers)
break; return;
list_del_init(&m->small_win);
m->group->open = true;
*usr_wakeup = true; *usr_wakeup = true;
m->usr_pending = false; tipc_group_update_member(m, 0);
return; return;
case GRP_RECLAIM_MSG: case GRP_RECLAIM_MSG:
if (!m) if (!m)
return; return;
*usr_wakeup = m->usr_pending;
m->usr_pending = false;
tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
m->window = ADV_IDLE; m->window = ADV_IDLE;
tipc_group_open(m, usr_wakeup);
return; return;
case GRP_REMIT_MSG: case GRP_REMIT_MSG:
if (!m || m->state != MBR_RECLAIMING) if (!m || m->state != MBR_RECLAIMING)
...@@ -883,9 +895,7 @@ void tipc_group_member_evt(struct tipc_group *grp, ...@@ -883,9 +895,7 @@ void tipc_group_member_evt(struct tipc_group *grp,
/* Member can be taken into service */ /* Member can be taken into service */
m->instance = instance; m->instance = instance;
m->state = MBR_JOINED; m->state = MBR_JOINED;
*usr_wakeup = true; tipc_group_open(m, usr_wakeup);
m->usr_pending = false;
list_del_init(&m->small_win);
tipc_group_update_member(m, 0); tipc_group_update_member(m, 0);
tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
tipc_group_create_event(grp, m, TIPC_PUBLISHED, tipc_group_create_event(grp, m, TIPC_PUBLISHED,
...@@ -895,12 +905,10 @@ void tipc_group_member_evt(struct tipc_group *grp, ...@@ -895,12 +905,10 @@ void tipc_group_member_evt(struct tipc_group *grp,
if (!m) if (!m)
break; break;
*usr_wakeup = true;
m->usr_pending = false;
tipc_group_decr_active(grp, m); tipc_group_decr_active(grp, m);
m->state = MBR_LEAVING; m->state = MBR_LEAVING;
list_del_init(&m->list); list_del_init(&m->list);
list_del_init(&m->small_win); tipc_group_open(m, usr_wakeup);
/* Only send event if no LEAVE message can be expected */ /* Only send event if no LEAVE message can be expected */
if (!tipc_node_is_up(net, node)) if (!tipc_node_is_up(net, node))
......
...@@ -67,9 +67,9 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); ...@@ -67,9 +67,9 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
int len, struct tipc_member **m); int len, struct tipc_member **m);
bool tipc_group_bc_cong(struct tipc_group *grp, int len); bool tipc_group_bc_cong(struct tipc_group *grp, int len);
bool tipc_group_is_open(struct tipc_group *grp);
void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
u32 port, struct sk_buff_head *xmitq); u32 port, struct sk_buff_head *xmitq);
u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
void tipc_group_update_member(struct tipc_member *m, int len); void tipc_group_update_member(struct tipc_member *m, int len);
int tipc_group_size(struct tipc_group *grp);
#endif #endif
...@@ -715,7 +715,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, ...@@ -715,7 +715,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk); struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_group *grp = tsk->group; struct tipc_group *grp;
u32 revents = 0; u32 revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait); sock_poll_wait(file, sk_sleep(sk), wait);
...@@ -736,9 +736,9 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, ...@@ -736,9 +736,9 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
revents |= POLLIN | POLLRDNORM; revents |= POLLIN | POLLRDNORM;
break; break;
case TIPC_OPEN: case TIPC_OPEN:
if (!grp || tipc_group_size(grp)) grp = tsk->group;
if (!tsk->cong_link_cnt) if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt)
revents |= POLLOUT; revents |= POLLOUT;
if (!tipc_sk_type_connectionless(sk)) if (!tipc_sk_type_connectionless(sk))
break; break;
if (skb_queue_empty(&sk->sk_receive_queue)) if (skb_queue_empty(&sk->sk_receive_queue))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment