Commit 35fc07ae authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-add-three-static-keys'

Eric Dumazet says:

====================
tcp: add three static keys

Recent addition of per TCP socket rx/tx cache brought
regressions for some workloads, as reported by Feng Tang.

It seems better to make them opt-in, before we adopt better
heuristics.

The last patch adds high_order_alloc_disable sysctl
to ask TCP sendmsg() to exclusively use order-0 allocations,
as mm layer has specific optimizations.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9a33629b ce27ec60
......@@ -772,6 +772,14 @@ tcp_challenge_ack_limit - INTEGER
in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
Default: 100
tcp_rx_skb_cache - BOOLEAN
Controls a per TCP socket cache of one skb, that might help
performance of some workloads. This might be dangerous
on systems with a lot of TCP sockets, since it increases
memory usage.
Default: 0 (disabled)
UDP variables:
udp_l3mdev_accept - BOOLEAN
......
......@@ -600,7 +600,6 @@ void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
extern int sysctl_unprivileged_bpf_disabled;
extern int sysctl_bpf_stats_enabled;
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
......
......@@ -63,6 +63,9 @@ extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
void __user *, size_t *, loff_t *);
extern int proc_do_large_bitmap(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int proc_do_static_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
/*
* Register a set of sysctl names by calling register_sysctl_table
......
......@@ -1463,12 +1463,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
__sk_mem_reclaim(sk, 1 << 20);
}
DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
sk->sk_wmem_queued -= skb->truesize;
sk_mem_uncharge(sk, skb->truesize);
if (!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
skb_zcopy_clear(skb, true);
sk->sk_tx_skb_cache = skb;
return;
......@@ -2433,13 +2435,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
* This routine must be called with interrupts disabled or with the socket
* locked so that the sk_buff queue operation is ok.
*/
DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
if (
#ifdef CONFIG_RPS
!static_branch_unlikely(&rps_needed) &&
#endif
if (static_branch_unlikely(&tcp_rx_skb_cache_key) &&
!sk->sk_rx_skb_cache) {
sk->sk_rx_skb_cache = skb;
skb_orphan(skb);
......@@ -2534,6 +2534,8 @@ extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_wmem ? */
......
......@@ -2097,7 +2097,6 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
int sysctl_bpf_stats_enabled __read_mostly;
/* All definitions of tracepoints related to BPF. */
#define CREATE_TRACE_POINTS
......
......@@ -230,11 +230,6 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
#endif
static int proc_dopipe_max_size(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#ifdef CONFIG_BPF_SYSCALL
static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
#endif
#ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses its own private copy */
......@@ -1253,12 +1248,10 @@ static struct ctl_table kern_table[] = {
},
{
.procname = "bpf_stats_enabled",
.data = &sysctl_bpf_stats_enabled,
.maxlen = sizeof(sysctl_bpf_stats_enabled),
.data = &bpf_stats_enabled_key.key,
.maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
.proc_handler = proc_dointvec_minmax_bpf_stats,
.extra1 = &zero,
.extra2 = &one,
.proc_handler = proc_do_static_key,
},
#endif
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
......@@ -3374,26 +3367,35 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
#endif /* CONFIG_PROC_SYSCTL */
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
#if defined(CONFIG_SYSCTL)
int proc_do_static_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret, bpf_stats = *(int *)table->data;
struct ctl_table tmp = *table;
struct static_key *key = (struct static_key *)table->data;
static DEFINE_MUTEX(static_key_mutex);
int val, ret;
struct ctl_table tmp = {
.data = &val,
.maxlen = sizeof(val),
.mode = table->mode,
.extra1 = &zero,
.extra2 = &one,
};
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
tmp.data = &bpf_stats;
mutex_lock(&static_key_mutex);
val = static_key_enabled(key);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
*(int *)table->data = bpf_stats;
if (bpf_stats)
static_branch_enable(&bpf_stats_enabled_key);
if (val)
static_key_enable(key);
else
static_branch_disable(&bpf_stats_enabled_key);
static_key_disable(key);
}
mutex_unlock(&static_key_mutex);
return ret;
}
#endif
......
......@@ -2320,6 +2320,7 @@ static void sk_leave_memory_pressure(struct sock *sk)
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
/**
* skb_page_frag_refill - check that a page_frag contains enough room
......@@ -2344,7 +2345,8 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
}
pfrag->offset = 0;
if (SKB_FRAG_PAGE_ORDER) {
if (SKB_FRAG_PAGE_ORDER &&
!static_branch_unlikely(&net_high_order_alloc_disable_key)) {
/* Avoid direct reclaim but allow kswapd to wake */
pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP | __GFP_NOWARN |
......
......@@ -562,6 +562,13 @@ static struct ctl_table net_core_table[] = {
.extra1 = &zero,
.extra2 = &two,
},
{
.procname = "high_order_alloc_disable",
.data = &net_high_order_alloc_disable_key.key,
.maxlen = sizeof(net_high_order_alloc_disable_key),
.mode = 0644,
.proc_handler = proc_do_static_key,
},
{ }
};
......
......@@ -51,6 +51,11 @@ static int comp_sack_nr_max = 255;
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
EXPORT_SYMBOL(tcp_rx_skb_cache_key);
DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
......@@ -559,6 +564,18 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &sysctl_fib_sync_mem_min,
.extra2 = &sysctl_fib_sync_mem_max,
},
{
.procname = "tcp_rx_skb_cache",
.data = &tcp_rx_skb_cache_key.key,
.mode = 0644,
.proc_handler = proc_do_static_key,
},
{
.procname = "tcp_tx_skb_cache",
.data = &tcp_tx_skb_cache_key.key,
.mode = 0644,
.proc_handler = proc_do_static_key,
},
{ }
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment