Commit 9990a79d authored by David S. Miller's avatar David S. Miller

Merge branch 'net-tls-add-a-TX-lock'

Jakub Kicinski says:

====================
net/tls: add a TX lock

Some time ago Pooja and Mallesham started reporting crashes with
an async accelerator. After trying to poke the existing logic into
shape I came to the conclusion that it can't be trusted, and to
preserve our sanity we should just add a lock around the TX side.

First patch removes the sk_write_pending checks from the write
space callbacks. Those don't seem to have a logical justification.

Patch 2 adds the TX lock and patch 3 associated test (which should
hang with current net).

Mallesham reports that even with these fixes applied the async
accelerator workload still occasionally hangs waiting for socket
memory. I suspect that's strictly related to the way async crypto
is integrated in TLS, so I think we should get these into net or
net-next and move from there.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 17fdd763 41098af5
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include <linux/socket.h> #include <linux/socket.h>
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/skmsg.h> #include <linux/skmsg.h>
#include <linux/mutex.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
...@@ -269,6 +270,10 @@ struct tls_context { ...@@ -269,6 +270,10 @@ struct tls_context {
bool in_tcp_sendpages; bool in_tcp_sendpages;
bool pending_open_record_frags; bool pending_open_record_frags;
struct mutex tx_lock; /* protects partially_sent_* fields and
* per-type TX fields
*/
unsigned long flags; unsigned long flags;
/* cache cold stuff */ /* cache cold stuff */
......
...@@ -523,8 +523,10 @@ static int tls_push_data(struct sock *sk, ...@@ -523,8 +523,10 @@ static int tls_push_data(struct sock *sk,
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{ {
unsigned char record_type = TLS_RECORD_TYPE_DATA; unsigned char record_type = TLS_RECORD_TYPE_DATA;
struct tls_context *tls_ctx = tls_get_ctx(sk);
int rc; int rc;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk); lock_sock(sk);
if (unlikely(msg->msg_controllen)) { if (unlikely(msg->msg_controllen)) {
...@@ -538,12 +540,14 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -538,12 +540,14 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
out: out:
release_sock(sk); release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
return rc; return rc;
} }
int tls_device_sendpage(struct sock *sk, struct page *page, int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags) int offset, size_t size, int flags)
{ {
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct iov_iter msg_iter; struct iov_iter msg_iter;
char *kaddr = kmap(page); char *kaddr = kmap(page);
struct kvec iov; struct kvec iov;
...@@ -552,6 +556,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, ...@@ -552,6 +556,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
if (flags & MSG_SENDPAGE_NOTLAST) if (flags & MSG_SENDPAGE_NOTLAST)
flags |= MSG_MORE; flags |= MSG_MORE;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk); lock_sock(sk);
if (flags & MSG_OOB) { if (flags & MSG_OOB) {
...@@ -568,6 +573,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, ...@@ -568,6 +573,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
out: out:
release_sock(sk); release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
return rc; return rc;
} }
...@@ -623,9 +629,11 @@ static int tls_device_push_pending_record(struct sock *sk, int flags) ...@@ -623,9 +629,11 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
void tls_device_write_space(struct sock *sk, struct tls_context *ctx) void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
{ {
if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) { if (tls_is_partially_sent_record(ctx)) {
gfp_t sk_allocation = sk->sk_allocation; gfp_t sk_allocation = sk->sk_allocation;
WARN_ON_ONCE(sk->sk_write_pending);
sk->sk_allocation = GFP_ATOMIC; sk->sk_allocation = GFP_ATOMIC;
tls_push_partial_record(sk, ctx, tls_push_partial_record(sk, ctx,
MSG_DONTWAIT | MSG_NOSIGNAL | MSG_DONTWAIT | MSG_NOSIGNAL |
......
...@@ -267,6 +267,7 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx) ...@@ -267,6 +267,7 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send)); memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv)); memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
mutex_destroy(&ctx->tx_lock);
if (sk) if (sk)
kfree_rcu(ctx, rcu); kfree_rcu(ctx, rcu);
...@@ -612,6 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk) ...@@ -612,6 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk)
if (!ctx) if (!ctx)
return NULL; return NULL;
mutex_init(&ctx->tx_lock);
rcu_assign_pointer(icsk->icsk_ulp_data, ctx); rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
ctx->sk_proto = sk->sk_prot; ctx->sk_proto = sk->sk_prot;
return ctx; return ctx;
......
...@@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -ENOTSUPP; return -ENOTSUPP;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk); lock_sock(sk);
/* Wait till there is any pending write on socket */
if (unlikely(sk->sk_write_pending)) {
ret = wait_on_pending_writer(sk, &timeo);
if (unlikely(ret))
goto send_end;
}
if (unlikely(msg->msg_controllen)) { if (unlikely(msg->msg_controllen)) {
ret = tls_proccess_cmsg(sk, msg, &record_type); ret = tls_proccess_cmsg(sk, msg, &record_type);
if (ret) { if (ret) {
...@@ -1091,6 +1085,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1091,6 +1085,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
ret = sk_stream_error(sk, msg->msg_flags, ret); ret = sk_stream_error(sk, msg->msg_flags, ret);
release_sock(sk); release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
return copied ? copied : ret; return copied ? copied : ret;
} }
...@@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, ...@@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/* Wait till there is any pending write on socket */
if (unlikely(sk->sk_write_pending)) {
ret = wait_on_pending_writer(sk, &timeo);
if (unlikely(ret))
goto sendpage_end;
}
/* Call the sk_stream functions to manage the sndbuf mem. */ /* Call the sk_stream functions to manage the sndbuf mem. */
while (size > 0) { while (size > 0) {
size_t copy, required_size; size_t copy, required_size;
...@@ -1219,15 +1207,18 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, ...@@ -1219,15 +1207,18 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
int tls_sw_sendpage(struct sock *sk, struct page *page, int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags) int offset, size_t size, int flags)
{ {
struct tls_context *tls_ctx = tls_get_ctx(sk);
int ret; int ret;
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
return -ENOTSUPP; return -ENOTSUPP;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk); lock_sock(sk);
ret = tls_sw_do_sendpage(sk, page, offset, size, flags); ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
release_sock(sk); release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
return ret; return ret;
} }
...@@ -2170,9 +2161,11 @@ static void tx_work_handler(struct work_struct *work) ...@@ -2170,9 +2161,11 @@ static void tx_work_handler(struct work_struct *work)
if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
return; return;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk); lock_sock(sk);
tls_tx_records(sk, -1); tls_tx_records(sk, -1);
release_sock(sk); release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
} }
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
...@@ -2180,12 +2173,9 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) ...@@ -2180,12 +2173,9 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx); struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
/* Schedule the transmission if tx list is ready */ /* Schedule the transmission if tx list is ready */
if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) { if (is_tx_ready(tx_ctx) &&
/* Schedule the transmission */ !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
if (!test_and_set_bit(BIT_TX_SCHEDULED, schedule_delayed_work(&tx_ctx->tx_work.work, 0);
&tx_ctx->tx_bitmask))
schedule_delayed_work(&tx_ctx->tx_work.work, 0);
}
} }
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
......
...@@ -898,6 +898,114 @@ TEST_F(tls, nonblocking) ...@@ -898,6 +898,114 @@ TEST_F(tls, nonblocking)
} }
} }
static void
test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self,
bool sendpg, unsigned int n_readers, unsigned int n_writers)
{
const unsigned int n_children = n_readers + n_writers;
const size_t data = 6 * 1000 * 1000;
const size_t file_sz = data / 100;
size_t read_bias, write_bias;
int i, fd, child_id;
char buf[file_sz];
pid_t pid;
/* Only allow multiples for simplicity */
ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true);
read_bias = n_writers / n_readers ?: 1;
write_bias = n_readers / n_writers ?: 1;
/* prep a file to send */
fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600);
ASSERT_GE(fd, 0);
memset(buf, 0xac, file_sz);
ASSERT_EQ(write(fd, buf, file_sz), file_sz);
/* spawn children */
for (child_id = 0; child_id < n_children; child_id++) {
pid = fork();
ASSERT_NE(pid, -1);
if (!pid)
break;
}
/* parent waits for all children */
if (pid) {
for (i = 0; i < n_children; i++) {
int status;
wait(&status);
EXPECT_EQ(status, 0);
}
return;
}
/* Split threads for reading and writing */
if (child_id < n_readers) {
size_t left = data * read_bias;
char rb[8001];
while (left) {
int res;
res = recv(self->cfd, rb,
left > sizeof(rb) ? sizeof(rb) : left, 0);
EXPECT_GE(res, 0);
left -= res;
}
} else {
size_t left = data * write_bias;
while (left) {
int res;
ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0);
if (sendpg)
res = sendfile(self->fd, fd, NULL,
left > file_sz ? file_sz : left);
else
res = send(self->fd, buf,
left > file_sz ? file_sz : left, 0);
EXPECT_GE(res, 0);
left -= res;
}
}
}
TEST_F(tls, mutliproc_even)
{
test_mutliproc(_metadata, self, false, 6, 6);
}
TEST_F(tls, mutliproc_readers)
{
test_mutliproc(_metadata, self, false, 4, 12);
}
TEST_F(tls, mutliproc_writers)
{
test_mutliproc(_metadata, self, false, 10, 2);
}
TEST_F(tls, mutliproc_sendpage_even)
{
test_mutliproc(_metadata, self, true, 6, 6);
}
TEST_F(tls, mutliproc_sendpage_readers)
{
test_mutliproc(_metadata, self, true, 4, 12);
}
TEST_F(tls, mutliproc_sendpage_writers)
{
test_mutliproc(_metadata, self, true, 10, 2);
}
TEST_F(tls, control_msg) TEST_F(tls, control_msg)
{ {
if (self->notls) if (self->notls)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment