Commit fd18d5f1 authored by David S. Miller's avatar David S. Miller

Merge branch 'tls-rx-avoid-skb_cow_data'

Jakub Kicinski says:

====================
tls: rx: avoid skb_cow_data()

TLS calls skb_cow_data() on the skb it received from strparser
whenever it needs to hold onto the skb with the decrypted data.
(The alternative being decrypting directly to a user space buffer
in whic case the input skb doesn't get modified or used after.)
TLS needs the decrypted skb:
 - almost always with TLS 1.3 (unless the new NoPad is enabled);
 - when user space buffer is too small to fit the record;
 - when BPF sockmap is enabled.

Most of the time the skb we get out of strparser is a clone of
a 64kB data unit coalsced by GRO. To make things worse skb_cow_data()
tries to output a linear skb and allocates it with GFP_ATOMIC.
This occasionally fails even under moderate memory pressure.

This patch set rejigs the TLS Rx so that we don't expect decryption
in place. The decryption handlers return an skb which may or may not
be the skb from strparser. For TLS 1.3 this results in a 20-30%
performance improvement without NoPad enabled.

v2: rebase after 3d8c51b2 ("net/tls: Check for errors in tls_device_init")
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3898f52c fd31f399
...@@ -72,7 +72,6 @@ struct sk_skb_cb { ...@@ -72,7 +72,6 @@ struct sk_skb_cb {
/* strp users' data follows */ /* strp users' data follows */
struct tls_msg { struct tls_msg {
u8 control; u8 control;
u8 decrypted;
} tls; } tls;
/* temp_reg is a temporary register used for bpf_convert_data_end_access /* temp_reg is a temporary register used for bpf_convert_data_end_access
* when dst_reg == src_reg. * when dst_reg == src_reg.
......
...@@ -116,11 +116,15 @@ struct tls_sw_context_rx { ...@@ -116,11 +116,15 @@ struct tls_sw_context_rx {
void (*saved_data_ready)(struct sock *sk); void (*saved_data_ready)(struct sock *sk);
struct sk_buff *recv_pkt; struct sk_buff *recv_pkt;
u8 reader_present;
u8 async_capable:1; u8 async_capable:1;
u8 zc_capable:1; u8 zc_capable:1;
u8 reader_contended:1;
atomic_t decrypt_pending; atomic_t decrypt_pending;
/* protect crypto_wait with decrypt_pending*/ /* protect crypto_wait with decrypt_pending*/
spinlock_t decrypt_compl_lock; spinlock_t decrypt_compl_lock;
struct sk_buff_head async_hold;
struct wait_queue_head wq;
}; };
struct tls_record_info { struct tls_record_info {
......
...@@ -7,7 +7,7 @@ CFLAGS_trace.o := -I$(src) ...@@ -7,7 +7,7 @@ CFLAGS_trace.o := -I$(src)
obj-$(CONFIG_TLS) += tls.o obj-$(CONFIG_TLS) += tls.o
tls-y := tls_main.o tls_sw.o tls_proc.o trace.o tls-y := tls_main.o tls_sw.o tls_proc.o trace.o tls_strp.o
tls-$(CONFIG_TLS_TOE) += tls_toe.o tls-$(CONFIG_TLS_TOE) += tls_toe.o
tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o
...@@ -39,6 +39,9 @@ ...@@ -39,6 +39,9 @@
#include <linux/skmsg.h> #include <linux/skmsg.h>
#include <net/tls.h> #include <net/tls.h>
#define TLS_PAGE_ORDER (min_t(unsigned int, PAGE_ALLOC_COSTLY_ORDER, \
TLS_MAX_PAYLOAD_SIZE >> PAGE_SHIFT))
#define __TLS_INC_STATS(net, field) \ #define __TLS_INC_STATS(net, field) \
__SNMP_INC_STATS((net)->mib.tls_statistics, field) __SNMP_INC_STATS((net)->mib.tls_statistics, field)
#define TLS_INC_STATS(net, field) \ #define TLS_INC_STATS(net, field) \
...@@ -118,13 +121,15 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx); ...@@ -118,13 +121,15 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
int tls_process_cmsg(struct sock *sk, struct msghdr *msg, int tls_process_cmsg(struct sock *sk, struct msghdr *msg,
unsigned char *record_type); unsigned char *record_type);
int decrypt_skb(struct sock *sk, struct sk_buff *skb, int decrypt_skb(struct sock *sk, struct scatterlist *sgout);
struct scatterlist *sgout);
int tls_sw_fallback_init(struct sock *sk, int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx, struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info); struct tls_crypto_info *crypto_info);
int tls_strp_msg_hold(struct sock *sk, struct sk_buff *skb,
struct sk_buff_head *dst);
static inline struct tls_msg *tls_msg(struct sk_buff *skb) static inline struct tls_msg *tls_msg(struct sk_buff *skb)
{ {
struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb; struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb;
...@@ -132,6 +137,11 @@ static inline struct tls_msg *tls_msg(struct sk_buff *skb) ...@@ -132,6 +137,11 @@ static inline struct tls_msg *tls_msg(struct sk_buff *skb)
return &scb->tls; return &scb->tls;
} }
static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx)
{
return ctx->recv_pkt;
}
#ifdef CONFIG_TLS_DEVICE #ifdef CONFIG_TLS_DEVICE
int tls_device_init(void); int tls_device_init(void);
void tls_device_cleanup(void); void tls_device_cleanup(void);
...@@ -140,8 +150,7 @@ void tls_device_free_resources_tx(struct sock *sk); ...@@ -140,8 +150,7 @@ void tls_device_free_resources_tx(struct sock *sk);
int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
void tls_device_offload_cleanup_rx(struct sock *sk); void tls_device_offload_cleanup_rx(struct sock *sk);
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx);
struct sk_buff *skb, struct strp_msg *rxm);
#else #else
static inline int tls_device_init(void) { return 0; } static inline int tls_device_init(void) { return 0; }
static inline void tls_device_cleanup(void) {} static inline void tls_device_cleanup(void) {}
...@@ -165,8 +174,7 @@ static inline void ...@@ -165,8 +174,7 @@ static inline void
tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {} tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {}
static inline int static inline int
tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
struct sk_buff *skb, struct strp_msg *rxm)
{ {
return 0; return 0;
} }
......
...@@ -889,14 +889,19 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx, ...@@ -889,14 +889,19 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
} }
} }
static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) static int
tls_device_reencrypt(struct sock *sk, struct tls_sw_context_rx *sw_ctx)
{ {
struct strp_msg *rxm = strp_msg(skb); int err = 0, offset, copy, nsg, data_len, pos;
int err = 0, offset = rxm->offset, copy, nsg, data_len, pos; struct sk_buff *skb, *skb_iter, *unused;
struct sk_buff *skb_iter, *unused;
struct scatterlist sg[1]; struct scatterlist sg[1];
struct strp_msg *rxm;
char *orig_buf, *buf; char *orig_buf, *buf;
skb = tls_strp_msg(sw_ctx);
rxm = strp_msg(skb);
offset = rxm->offset;
orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE +
TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation); TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation);
if (!orig_buf) if (!orig_buf)
...@@ -919,7 +924,7 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) ...@@ -919,7 +924,7 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
goto free_buf; goto free_buf;
/* We are interested only in the decrypted data not the auth */ /* We are interested only in the decrypted data not the auth */
err = decrypt_skb(sk, skb, sg); err = decrypt_skb(sk, sg);
if (err != -EBADMSG) if (err != -EBADMSG)
goto free_buf; goto free_buf;
else else
...@@ -974,10 +979,12 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) ...@@ -974,10 +979,12 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
return err; return err;
} }
int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
struct sk_buff *skb, struct strp_msg *rxm)
{ {
struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx); struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_buff *skb = tls_strp_msg(sw_ctx);
struct strp_msg *rxm = strp_msg(skb);
int is_decrypted = skb->decrypted; int is_decrypted = skb->decrypted;
int is_encrypted = !is_decrypted; int is_encrypted = !is_decrypted;
struct sk_buff *skb_iter; struct sk_buff *skb_iter;
...@@ -1000,7 +1007,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, ...@@ -1000,7 +1007,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
* likely have initial fragments decrypted, and final ones not * likely have initial fragments decrypted, and final ones not
* decrypted. We need to reencrypt that single SKB. * decrypted. We need to reencrypt that single SKB.
*/ */
return tls_device_reencrypt(sk, skb); return tls_device_reencrypt(sk, sw_ctx);
} }
/* Return immediately if the record is either entirely plaintext or /* Return immediately if the record is either entirely plaintext or
...@@ -1017,7 +1024,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, ...@@ -1017,7 +1024,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
} }
ctx->resync_nh_reset = 1; ctx->resync_nh_reset = 1;
return tls_device_reencrypt(sk, skb); return tls_device_reencrypt(sk, sw_ctx);
} }
static void tls_device_attach(struct tls_context *ctx, struct sock *sk, static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/skbuff.h>
#include "tls.h"
int tls_strp_msg_hold(struct sock *sk, struct sk_buff *skb,
struct sk_buff_head *dst)
{
struct sk_buff *clone;
clone = skb_clone(skb, sk->sk_allocation);
if (!clone)
return -ENOMEM;
__skb_queue_tail(dst, clone);
return 0;
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment