Commit 452a2613 authored by David S. Miller's avatar David S. Miller

Merge branch 'tls-RX'

Dave Watson says:

====================
TLS Rx

TLS tcp socket RX implementation, to match existing TX code.

This patchset completes the software TLS socket, allowing full
bi-directional communication over TLS using normal socket syscalls,
after the handshake has been done in userspace.  Only the symmetric
encryption is done in the kernel.

This allows usage of TLS sockets from within the kernel (for example
with network block device, or from bpf).  Performance can be better
than userspace, with appropriate crypto routines [1].

sk->sk_socket->ops must be overridden to implement splice_read and
poll, but otherwise the interface & implementation match TX closely.
strparser is used to parse TLS framing on receive.

There are Openssl RX patches that work with this interface [2], as
well as a testing tool using the socket interface directly (without
cmsg support) [3].  An example tcp socket setup is:

  // Normal tcp socket connect/accept, and TLS handshake
  // using any TLS library.
  setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls"));

  struct tls12_crypto_info_aes_gcm_128 crypto_info_rx;
  // Fill in crypto_info based on negotiated keys.

  setsockopt(sock, SOL_TLS, TLS_RX, &crypto_info, sizeof(crypto_info_rx));
  // You can optionally TLX_TX as well.

  char buffer[16384];
  int ret = recv(sock, buffer, 16384);

  // cmsg can be received using recvmsg and a msg_control
  // of type TLS_GET_RECORD_TYPE will be set.

V1 -> V2

* For too-small framing errors, return EBADMSG, to match openssl error
  code semantics.  Docs and commit logs about this also updated.

RFC -> V1

* Refactor 'tx' variable names to drop tx
* Error return codes changed per discussion
* Only call skb_cow_data based on in-place decryption,
  drop unnecessary frag list check.

[1] Recent crypto patchset to remove copies, resulting in optimally
    zero copies vs. userspace's one, vs. previous kernel's two.

https://marc.info/?l=linux-crypto-vger&m=151931242406416&w=2

[2] https://github.com/Mellanox/openssl/commits/tls_rx2

[3] https://github.com/ktls/af_ktls-tool/tree/RX
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ae06c70b b6c535b1
......@@ -48,6 +48,9 @@ the transmit and the receive into the kernel.
setsockopt(sock, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info));
Transmit and receive are set separately, but the setup is the same, using either
TLS_TX or TLS_RX.
Sending TLS application data
----------------------------
......@@ -79,6 +82,28 @@ for memory), or the encryption will always succeed. If send() returns
-ENOMEM and some data was left on the socket buffer from a previous
call using MSG_MORE, the MSG_MORE data is left on the socket buffer.
Receiving TLS application data
------------------------------
After setting the TLS_RX socket option, all recv family socket calls
are decrypted using TLS parameters provided. A full TLS record must
be received before decryption can happen.
char buffer[16384];
recv(sock, buffer, 16384);
Received data is decrypted directly in to the user buffer if it is
large enough, and no additional allocations occur. If the userspace
buffer is too small, data is decrypted in the kernel and copied to
userspace.
EINVAL is returned if the TLS version in the received message does not
match the version passed in setsockopt.
EMSGSIZE is returned if the received message is too big.
EBADMSG is returned if decryption failed for any other reason.
Send TLS control messages
-------------------------
......@@ -118,6 +143,43 @@ using a record of type @record_type.
Control message data should be provided unencrypted, and will be
encrypted by the kernel.
Receiving TLS control messages
------------------------------
TLS control messages are passed in the userspace buffer, with message
type passed via cmsg. If no cmsg buffer is provided, an error is
returned if a control message is received. Data messages may be
received without a cmsg buffer set.
char buffer[16384];
char cmsg[CMSG_SPACE(sizeof(unsigned char))];
struct msghdr msg = {0};
msg.msg_control = cmsg;
msg.msg_controllen = sizeof(cmsg);
struct iovec msg_iov;
msg_iov.iov_base = buffer;
msg_iov.iov_len = 16384;
msg.msg_iov = &msg_iov;
msg.msg_iovlen = 1;
int ret = recvmsg(sock, &msg, 0 /* flags */);
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
if (cmsg->cmsg_level == SOL_TLS &&
cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
int record_type = *((unsigned char *)CMSG_DATA(cmsg));
// Do something with record_type, and control message data in
// buffer.
//
// Note that record_type may be == to application data (23).
} else {
// Buffer contains application data.
}
recv will never return data from mixed types of TLS records.
Integrating in to userspace TLS library
---------------------------------------
......@@ -126,10 +188,10 @@ layer of a userspace TLS library.
A patchset to OpenSSL to use ktls as the record layer is here:
https://github.com/Mellanox/tls-openssl
https://github.com/Mellanox/openssl/commits/tls_rx2
An example of calling send directly after a handshake using
gnutls. Since it doesn't implement a full record layer, control
messages are not supported:
https://github.com/Mellanox/tls-af_ktls_tool
https://github.com/ktls/af_ktls-tool/commits/RX
......@@ -40,6 +40,7 @@
#include <linux/socket.h>
#include <linux/tcp.h>
#include <net/tcp.h>
#include <net/strparser.h>
#include <uapi/linux/tls.h>
......@@ -58,8 +59,18 @@
struct tls_sw_context {
struct crypto_aead *aead_send;
struct crypto_aead *aead_recv;
struct crypto_wait async_wait;
/* Receive context */
struct strparser strp;
void (*saved_data_ready)(struct sock *sk);
unsigned int (*sk_poll)(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
struct sk_buff *recv_pkt;
u8 control;
bool decrypted;
/* Sending context */
char aad_space[TLS_AAD_SPACE_SIZE];
......@@ -81,23 +92,32 @@ enum {
TLS_PENDING_CLOSED_RECORD
};
struct cipher_context {
u16 prepend_size;
u16 tag_size;
u16 overhead_size;
u16 iv_size;
char *iv;
u16 rec_seq_size;
char *rec_seq;
};
struct tls_context {
union {
struct tls_crypto_info crypto_send;
struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128;
};
union {
struct tls_crypto_info crypto_recv;
struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128;
};
void *priv_ctx;
u8 tx_conf:2;
u8 conf:2;
u16 prepend_size;
u16 tag_size;
u16 overhead_size;
u16 iv_size;
char *iv;
u16 rec_seq_size;
char *rec_seq;
struct cipher_context tx;
struct cipher_context rx;
struct scatterlist *partially_sent_record;
u16 partially_sent_offset;
......@@ -124,12 +144,19 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
unsigned int optlen);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_sw_close(struct sock *sk, long timeout);
void tls_sw_free_tx_resources(struct sock *sk);
void tls_sw_free_resources(struct sock *sk);
int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
unsigned int tls_sw_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
void tls_icsk_clean_acked(struct sock *sk);
......@@ -170,9 +197,9 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
return tls_ctx->pending_open_record_frags;
}
static inline void tls_err_abort(struct sock *sk)
static inline void tls_err_abort(struct sock *sk, int err)
{
sk->sk_err = EBADMSG;
sk->sk_err = err;
sk->sk_error_report(sk);
}
......@@ -190,10 +217,10 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
}
static inline void tls_advance_record_sn(struct sock *sk,
struct tls_context *ctx)
struct cipher_context *ctx)
{
if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size))
tls_err_abort(sk);
tls_err_abort(sk, EBADMSG);
tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
ctx->iv_size);
}
......@@ -203,9 +230,9 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
size_t plaintext_len,
unsigned char record_type)
{
size_t pkt_len, iv_size = ctx->iv_size;
size_t pkt_len, iv_size = ctx->tx.iv_size;
pkt_len = plaintext_len + iv_size + ctx->tag_size;
pkt_len = plaintext_len + iv_size + ctx->tx.tag_size;
/* we cover nonce explicit here as well, so buf should be of
* size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
......@@ -217,7 +244,7 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
buf[3] = pkt_len >> 8;
buf[4] = pkt_len & 0xFF;
memcpy(buf + TLS_NONCE_OFFSET,
ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
}
static inline void tls_make_aad(char *buf,
......
......@@ -38,6 +38,7 @@
/* TLS socket options */
#define TLS_TX 1 /* Set transmit parameters */
#define TLS_RX 2 /* Set receive parameters */
/* Supported versions */
#define TLS_VERSION_MINOR(ver) ((ver) & 0xFF)
......@@ -59,6 +60,7 @@
#define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8
#define TLS_SET_RECORD_TYPE 1
#define TLS_GET_RECORD_TYPE 2
struct tls_crypto_info {
__u16 version;
......
......@@ -7,6 +7,7 @@ config TLS
select CRYPTO
select CRYPTO_AES
select CRYPTO_GCM
select STREAM_PARSER
default n
---help---
Enable kernel support for TLS protocol. This allows symmetric
......
......@@ -52,20 +52,23 @@ enum {
};
enum {
TLS_BASE_TX,
TLS_BASE,
TLS_SW_TX,
TLS_SW_RX,
TLS_SW_RXTX,
TLS_NUM_CONFIG,
};
static struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
static struct proto_ops tls_sw_proto_ops;
static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
{
int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf];
sk->sk_prot = &tls_prots[ip_ver][ctx->conf];
}
int wait_on_pending_writer(struct sock *sk, long *timeo)
......@@ -238,7 +241,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
lock_sock(sk);
sk_proto_close = ctx->sk_proto_close;
if (ctx->tx_conf == TLS_BASE_TX) {
if (ctx->conf == TLS_BASE) {
kfree(ctx);
goto skip_tx_cleanup;
}
......@@ -259,11 +262,16 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
}
}
kfree(ctx->rec_seq);
kfree(ctx->iv);
kfree(ctx->tx.rec_seq);
kfree(ctx->tx.iv);
kfree(ctx->rx.rec_seq);
kfree(ctx->rx.iv);
if (ctx->tx_conf == TLS_SW_TX)
tls_sw_free_tx_resources(sk);
if (ctx->conf == TLS_SW_TX ||
ctx->conf == TLS_SW_RX ||
ctx->conf == TLS_SW_RXTX) {
tls_sw_free_resources(sk);
}
skip_tx_cleanup:
release_sock(sk);
......@@ -319,9 +327,9 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
}
lock_sock(sk);
memcpy(crypto_info_aes_gcm_128->iv,
ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
TLS_CIPHER_AES_GCM_128_IV_SIZE);
memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq,
memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->tx.rec_seq,
TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
release_sock(sk);
if (copy_to_user(optval,
......@@ -365,20 +373,24 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
return do_tls_getsockopt(sk, optname, optval, optlen);
}
static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
unsigned int optlen)
static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
unsigned int optlen, int tx)
{
struct tls_crypto_info *crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
int rc = 0;
int tx_conf;
int conf;
if (!optval || (optlen < sizeof(*crypto_info))) {
rc = -EINVAL;
goto out;
}
if (tx)
crypto_info = &ctx->crypto_send;
else
crypto_info = &ctx->crypto_recv;
/* Currently we don't support set crypto info more than one time */
if (TLS_CRYPTO_INFO_READY(crypto_info)) {
rc = -EBUSY;
......@@ -417,15 +429,31 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
}
/* currently SW is default, we will have ethtool in future */
rc = tls_set_sw_offload(sk, ctx);
tx_conf = TLS_SW_TX;
if (tx) {
rc = tls_set_sw_offload(sk, ctx, 1);
if (ctx->conf == TLS_SW_RX)
conf = TLS_SW_RXTX;
else
conf = TLS_SW_TX;
} else {
rc = tls_set_sw_offload(sk, ctx, 0);
if (ctx->conf == TLS_SW_TX)
conf = TLS_SW_RXTX;
else
conf = TLS_SW_RX;
}
if (rc)
goto err_crypto_info;
ctx->tx_conf = tx_conf;
ctx->conf = conf;
update_sk_prot(sk, ctx);
if (tx) {
ctx->sk_write_space = sk->sk_write_space;
sk->sk_write_space = tls_write_space;
} else {
sk->sk_socket->ops = &tls_sw_proto_ops;
}
goto out;
err_crypto_info:
......@@ -441,8 +469,10 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
switch (optname) {
case TLS_TX:
case TLS_RX:
lock_sock(sk);
rc = do_tls_setsockopt_tx(sk, optval, optlen);
rc = do_tls_setsockopt_conf(sk, optval, optlen,
optname == TLS_TX);
release_sock(sk);
break;
default:
......@@ -465,14 +495,22 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
static void build_protos(struct proto *prot, struct proto *base)
{
prot[TLS_BASE_TX] = *base;
prot[TLS_BASE_TX].setsockopt = tls_setsockopt;
prot[TLS_BASE_TX].getsockopt = tls_getsockopt;
prot[TLS_BASE_TX].close = tls_sk_proto_close;
prot[TLS_BASE] = *base;
prot[TLS_BASE].setsockopt = tls_setsockopt;
prot[TLS_BASE].getsockopt = tls_getsockopt;
prot[TLS_BASE].close = tls_sk_proto_close;
prot[TLS_SW_TX] = prot[TLS_BASE_TX];
prot[TLS_SW_TX] = prot[TLS_BASE];
prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg;
prot[TLS_SW_TX].sendpage = tls_sw_sendpage;
prot[TLS_SW_RX] = prot[TLS_BASE];
prot[TLS_SW_RX].recvmsg = tls_sw_recvmsg;
prot[TLS_SW_RX].close = tls_sk_proto_close;
prot[TLS_SW_RXTX] = prot[TLS_SW_TX];
prot[TLS_SW_RXTX].recvmsg = tls_sw_recvmsg;
prot[TLS_SW_RXTX].close = tls_sk_proto_close;
}
static int tls_init(struct sock *sk)
......@@ -513,7 +551,7 @@ static int tls_init(struct sock *sk)
mutex_unlock(&tcpv6_prot_mutex);
}
ctx->tx_conf = TLS_BASE_TX;
ctx->conf = TLS_BASE;
update_sk_prot(sk, ctx);
out:
return rc;
......@@ -531,6 +569,10 @@ static int __init tls_register(void)
{
build_protos(tls_prots[TLSV4], &tcp_prot);
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.poll = tls_sw_poll;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment