Commit 4c94cc2d authored by Jon Maloy's avatar Jon Maloy Committed by David S. Miller

tipc: fall back to smaller MTU if allocation of local send skb fails

When sending node local messages the code is using an 'mtu' of 66060
bytes to avoid unnecessary fragmentation. During situations of low
memory tipc_msg_build() may sometimes fail to allocate such large
buffers, resulting in unnecessary send failures. This can easily be
remedied by falling back to a smaller MTU, and then reassemble the
buffer chain as if the message were arriving from a remote node.

At the same time, we change the initial MTU setting of the broadcast
link to a lower value, so that large messages always are fragmented
into smaller buffers even when we run in single node mode. Apart from
obtaining the same advantage as for the 'fallback' solution above, this
turns out to give a significant performance improvement. This can
probably be explained with the __pskb_copy() operation performed on the
buffer for each recipient during reception. We found the optimal value
for this, considering the most relevant skb pool, to be 3744 bytes.
Acked-by: default avatarYing Xue <ying.xue@ericsson.com>
Signed-off-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 201c78e0
/* /*
* net/tipc/bcast.c: TIPC broadcast code * net/tipc/bcast.c: TIPC broadcast code
* *
* Copyright (c) 2004-2006, 2014-2016, Ericsson AB * Copyright (c) 2004-2006, 2014-2017, Ericsson AB
* Copyright (c) 2004, Intel Corporation. * Copyright (c) 2004, Intel Corporation.
* Copyright (c) 2005, 2010-2011, Wind River Systems * Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved. * All rights reserved.
...@@ -74,6 +74,10 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net) ...@@ -74,6 +74,10 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net)
return tipc_net(net)->bcbase; return tipc_net(net)->bcbase;
} }
/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link
* Note: the MTU is decremented to give room for a tunnel header, in
* case the message needs to be sent as replicast
*/
int tipc_bcast_get_mtu(struct net *net) int tipc_bcast_get_mtu(struct net *net)
{ {
return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE;
...@@ -515,7 +519,7 @@ int tipc_bcast_init(struct net *net) ...@@ -515,7 +519,7 @@ int tipc_bcast_init(struct net *net)
spin_lock_init(&tipc_net(net)->bclock); spin_lock_init(&tipc_net(net)->bclock);
if (!tipc_link_bc_create(net, 0, 0, if (!tipc_link_bc_create(net, 0, 0,
U16_MAX, FB_MTU,
BCLINK_WIN_DEFAULT, BCLINK_WIN_DEFAULT,
0, 0,
&bb->inputq, &bb->inputq,
......
...@@ -483,7 +483,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, ...@@ -483,7 +483,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
/** /**
* tipc_link_bc_create - create new link to be used for broadcast * tipc_link_bc_create - create new link to be used for broadcast
* @n: pointer to associated node * @n: pointer to associated node
* @mtu: mtu to be used * @mtu: mtu to be used initially if no peers
* @window: send window to be used * @window: send window to be used
* @inputq: queue to put messages ready for delivery * @inputq: queue to put messages ready for delivery
* @namedq: queue to put binding table update messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery
......
...@@ -251,20 +251,23 @@ bool tipc_msg_validate(struct sk_buff **_skb) ...@@ -251,20 +251,23 @@ bool tipc_msg_validate(struct sk_buff **_skb)
* @pktmax: Max packet size that can be used * @pktmax: Max packet size that can be used
* @list: Buffer or chain of buffers to be returned to caller * @list: Buffer or chain of buffers to be returned to caller
* *
* Note that the recursive call we are making here is safe, since it can
* logically go only one further level down.
*
* Returns message data size or errno: -ENOMEM, -EFAULT * Returns message data size or errno: -ENOMEM, -EFAULT
*/ */
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
int offset, int dsz, int pktmax, struct sk_buff_head *list) int dsz, int pktmax, struct sk_buff_head *list)
{ {
int mhsz = msg_hdr_sz(mhdr); int mhsz = msg_hdr_sz(mhdr);
struct tipc_msg pkthdr;
int msz = mhsz + dsz; int msz = mhsz + dsz;
int pktno = 1;
int pktsz;
int pktrem = pktmax; int pktrem = pktmax;
int drem = dsz;
struct tipc_msg pkthdr;
struct sk_buff *skb; struct sk_buff *skb;
int drem = dsz;
int pktno = 1;
char *pktpos; char *pktpos;
int pktsz;
int rc; int rc;
msg_set_size(mhdr, msz); msg_set_size(mhdr, msz);
...@@ -272,8 +275,18 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, ...@@ -272,8 +275,18 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
/* No fragmentation needed? */ /* No fragmentation needed? */
if (likely(msz <= pktmax)) { if (likely(msz <= pktmax)) {
skb = tipc_buf_acquire(msz, GFP_KERNEL); skb = tipc_buf_acquire(msz, GFP_KERNEL);
if (unlikely(!skb))
/* Fall back to smaller MTU if node local message */
if (unlikely(!skb)) {
if (pktmax != MAX_MSG_SIZE)
return -ENOMEM; return -ENOMEM;
rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
if (rc != dsz)
return rc;
if (tipc_msg_assemble(list))
return dsz;
return -ENOMEM;
}
skb_orphan(skb); skb_orphan(skb);
__skb_queue_tail(list, skb); __skb_queue_tail(list, skb);
skb_copy_to_linear_data(skb, mhdr, mhsz); skb_copy_to_linear_data(skb, mhdr, mhsz);
...@@ -589,6 +602,30 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) ...@@ -589,6 +602,30 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
return true; return true;
} }
/* tipc_msg_assemble() - assemble chain of fragments into one message
*/
bool tipc_msg_assemble(struct sk_buff_head *list)
{
struct sk_buff *skb, *tmp = NULL;
if (skb_queue_len(list) == 1)
return true;
while ((skb = __skb_dequeue(list))) {
skb->next = NULL;
if (tipc_buf_append(&tmp, &skb)) {
__skb_queue_tail(list, skb);
return true;
}
if (!tmp)
break;
}
__skb_queue_purge(list);
__skb_queue_head_init(list);
pr_warn("Failed do assemble buffer\n");
return false;
}
/* tipc_msg_reassemble() - clone a buffer chain of fragments and /* tipc_msg_reassemble() - clone a buffer chain of fragments and
* reassemble the clones into one message * reassemble the clones into one message
*/ */
......
...@@ -98,7 +98,7 @@ struct plist; ...@@ -98,7 +98,7 @@ struct plist;
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */ #define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
#define FB_MTU 3744
#define TIPC_MEDIA_INFO_OFFSET 5 #define TIPC_MEDIA_INFO_OFFSET 5
struct tipc_skb_cb { struct tipc_skb_cb {
...@@ -943,6 +943,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); ...@@ -943,6 +943,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list); int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
bool tipc_msg_assemble(struct sk_buff_head *list);
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
struct sk_buff_head *cpy); struct sk_buff_head *cpy);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment