Commit cc79dd1b authored by Ying Xue's avatar Ying Xue Committed by David S. Miller

tipc: change socket buffer overflow control to respect sk_rcvbuf

As per feedback from the netdev community, we change the buffer
overflow protection algorithm in receiving sockets so that it
always respects the nominal upper limit set in sk_rcvbuf.

Instead of scaling up from a small sk_rcvbuf value, which leads to
violation of the configured sk_rcvbuf limit, we now calculate the
weighted per-message limit by scaling down from a much bigger value,
still in the same field, according to the importance priority of the
received message.

To allow for administrative tunability of the socket receive buffer
size, we create a tipc_rmem sysctl variable to allow the user to
configure an even bigger value via sysctl command.  It is a size of
three (min/default/max) to be consistent with things like tcp_rmem.

By default, the value initialized in tipc_rmem[1] is equal to the
receive socket size needed by a TIPC_CRITICAL_IMPORTANCE message.
This value is also set as the default value of sk_rcvbuf.
Originally-by: default avatarJon Maloy <jon.maloy@ericsson.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
[Ying: added sysctl variation to Jon's original patch]
Signed-off-by: default avatarYing Xue <ying.xue@windriver.com>
[PG: don't compile sysctl.c if not config'd; add Documentation]
Signed-off-by: default avatarPaul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8941bbcd
...@@ -26,7 +26,7 @@ Table : Subdirectories in /proc/sys/net ...@@ -26,7 +26,7 @@ Table : Subdirectories in /proc/sys/net
ipv4 IP version 4 x25 X.25 protocol ipv4 IP version 4 x25 X.25 protocol
ipx IPX token-ring IBM token ring ipx IPX token-ring IBM token ring
bridge Bridging decnet DEC net bridge Bridging decnet DEC net
ipv6 IP version 6 ipv6 IP version 6 tipc TIPC
.............................................................................. ..............................................................................
1. /proc/sys/net/core - Network core options 1. /proc/sys/net/core - Network core options
...@@ -207,3 +207,18 @@ IPX. ...@@ -207,3 +207,18 @@ IPX.
The /proc/net/ipx_route table holds a list of IPX routes. For each route it The /proc/net/ipx_route table holds a list of IPX routes. For each route it
gives the destination network, the router node (or Directly) and the network gives the destination network, the router node (or Directly) and the network
address of the router (or Connected) for internal networks. address of the router (or Connected) for internal networks.
6. TIPC
-------------------------------------------------------
The TIPC protocol now has a tunable for the receive memory, similar to the
tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
# cat /proc/sys/net/tipc/tipc_rmem
4252725 34021800 68043600
#
The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
are scaled (shifted) versions of that same value. Note that the min value
is not at this point in time used in any meaningful way, but the triplet is
preserved in order to be consistent with things like tcp_rmem.
...@@ -11,3 +11,4 @@ tipc-y += addr.o bcast.o bearer.o config.o \ ...@@ -11,3 +11,4 @@ tipc-y += addr.o bcast.o bearer.o config.o \
socket.o log.o eth_media.o socket.o log.o eth_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "name_table.h" #include "name_table.h"
#include "subscr.h" #include "subscr.h"
#include "config.h" #include "config.h"
#include "port.h"
#include <linux/module.h> #include <linux/module.h>
...@@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly; ...@@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly;
int tipc_max_ports __read_mostly; int tipc_max_ports __read_mostly;
int tipc_net_id __read_mostly; int tipc_net_id __read_mostly;
int tipc_remote_management __read_mostly; int tipc_remote_management __read_mostly;
int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */
/** /**
* tipc_buf_acquire - creates a TIPC message buffer * tipc_buf_acquire - creates a TIPC message buffer
...@@ -118,6 +119,7 @@ static void tipc_core_stop(void) ...@@ -118,6 +119,7 @@ static void tipc_core_stop(void)
tipc_nametbl_stop(); tipc_nametbl_stop();
tipc_ref_table_stop(); tipc_ref_table_stop();
tipc_socket_stop(); tipc_socket_stop();
tipc_unregister_sysctl();
} }
/** /**
...@@ -142,13 +144,14 @@ static int tipc_core_start(void) ...@@ -142,13 +144,14 @@ static int tipc_core_start(void)
res = tipc_netlink_start(); res = tipc_netlink_start();
if (!res) if (!res)
res = tipc_socket_init(); res = tipc_socket_init();
if (!res)
res = tipc_register_sysctl();
if (res) if (res)
tipc_core_stop(); tipc_core_stop();
return res; return res;
} }
static int __init tipc_init(void) static int __init tipc_init(void)
{ {
int res; int res;
...@@ -160,6 +163,11 @@ static int __init tipc_init(void) ...@@ -160,6 +163,11 @@ static int __init tipc_init(void)
tipc_max_ports = CONFIG_TIPC_PORTS; tipc_max_ports = CONFIG_TIPC_PORTS;
tipc_net_id = 4711; tipc_net_id = 4711;
sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE;
sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 <<
TIPC_CRITICAL_IMPORTANCE;
sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT;
res = tipc_core_start(); res = tipc_core_start();
if (res) if (res)
pr_err("Unable to start in single node mode\n"); pr_err("Unable to start in single node mode\n");
......
...@@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly; ...@@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly;
extern int tipc_max_ports __read_mostly; extern int tipc_max_ports __read_mostly;
extern int tipc_net_id __read_mostly; extern int tipc_net_id __read_mostly;
extern int tipc_remote_management __read_mostly; extern int tipc_remote_management __read_mostly;
extern int sysctl_tipc_rmem[3] __read_mostly;
/* /*
* Other global variables * Other global variables
...@@ -97,6 +98,14 @@ extern void tipc_netlink_stop(void); ...@@ -97,6 +98,14 @@ extern void tipc_netlink_stop(void);
extern int tipc_socket_init(void); extern int tipc_socket_init(void);
extern void tipc_socket_stop(void); extern void tipc_socket_stop(void);
#ifdef CONFIG_SYSCTL
extern int tipc_register_sysctl(void);
extern void tipc_unregister_sysctl(void);
#else
#define tipc_register_sysctl() 0
#define tipc_unregister_sysctl()
#endif
/* /*
* TIPC timer and signal code * TIPC timer and signal code
*/ */
......
...@@ -43,6 +43,8 @@ ...@@ -43,6 +43,8 @@
#include "node_subscr.h" #include "node_subscr.h"
#define TIPC_FLOW_CONTROL_WIN 512 #define TIPC_FLOW_CONTROL_WIN 512
#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref,
struct sk_buff **buf, unsigned char const *data, struct sk_buff **buf, unsigned char const *data,
......
...@@ -43,8 +43,6 @@ ...@@ -43,8 +43,6 @@
#define SS_LISTENING -1 /* socket is listening */ #define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */ #define SS_READY -2 /* socket is connectionless */
#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
struct tipc_sock { struct tipc_sock {
...@@ -203,6 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, ...@@ -203,6 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
sock_init_data(sock, sk); sock_init_data(sock, sk);
sk->sk_backlog_rcv = backlog_rcv; sk->sk_backlog_rcv = backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready; sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space; sk->sk_write_space = tipc_write_space;
tipc_sk(sk)->p = tp_ptr; tipc_sk(sk)->p = tp_ptr;
...@@ -1233,10 +1232,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) ...@@ -1233,10 +1232,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
* For all connectionless messages, by default new queue limits are * For all connectionless messages, by default new queue limits are
* as belows: * as belows:
* *
* TIPC_LOW_IMPORTANCE (5MB) * TIPC_LOW_IMPORTANCE (4 MB)
* TIPC_MEDIUM_IMPORTANCE (10MB) * TIPC_MEDIUM_IMPORTANCE (8 MB)
* TIPC_HIGH_IMPORTANCE (20MB) * TIPC_HIGH_IMPORTANCE (16 MB)
* TIPC_CRITICAL_IMPORTANCE (40MB) * TIPC_CRITICAL_IMPORTANCE (32 MB)
* *
* Returns overload limit according to corresponding message importance * Returns overload limit according to corresponding message importance
*/ */
...@@ -1246,9 +1245,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) ...@@ -1246,9 +1245,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
unsigned int limit; unsigned int limit;
if (msg_connected(msg)) if (msg_connected(msg))
limit = CONN_OVERLOAD_LIMIT; limit = sysctl_tipc_rmem[2];
else else
limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
msg_importance(msg);
return limit; return limit;
} }
...@@ -1847,7 +1847,8 @@ static const struct net_proto_family tipc_family_ops = { ...@@ -1847,7 +1847,8 @@ static const struct net_proto_family tipc_family_ops = {
static struct proto tipc_proto = { static struct proto tipc_proto = {
.name = "TIPC", .name = "TIPC",
.owner = THIS_MODULE, .owner = THIS_MODULE,
.obj_size = sizeof(struct tipc_sock) .obj_size = sizeof(struct tipc_sock),
.sysctl_rmem = sysctl_tipc_rmem
}; };
/** /**
......
/*
* net/tipc/sysctl.c: sysctl interface to TIPC subsystem
*
* Copyright (c) 2013, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "core.h"
#include <linux/sysctl.h>
static struct ctl_table_header *tipc_ctl_hdr;
static struct ctl_table tipc_table[] = {
{
.procname = "tipc_rmem",
.data = &sysctl_tipc_rmem,
.maxlen = sizeof(sysctl_tipc_rmem),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{}
};
int tipc_register_sysctl(void)
{
tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table);
if (tipc_ctl_hdr == NULL)
return -ENOMEM;
return 0;
}
void tipc_unregister_sysctl(void)
{
unregister_net_sysctl_table(tipc_ctl_hdr);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment