Commit fcb30073 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'wireguard-fixes-for-5-12-rc1'

Jason Donenfeld says:

====================
wireguard fixes for 5.12-rc1

This series has a collection of fixes that have piled up for a little
while now, that I unfortunately didn't get a chance to send out earlier.

1) Removes unlikely() from IS_ERR(), since it's already implied.

2) Remove a bogus sparse annotation that hasn't been needed for years.

3) Addition test in the test suite for stressing parallel ndo_start_xmit.

4) Slight struct reordering in preparation for subsequent fix.

5) If skb->protocol is bogus, we no longer attempt to send icmp messages.

6) Massive memory usage fix, hit by larger deployments.

7) Fix typo in kconfig dependency logic.

(1) and (2) are tiny cleanups, and (3) is just a test, so if you're
trying to reduce churn, you could not backport these. But (4), (5), (6),
and (7) fix problems and should be applied to stable. IMO, it's probably
easiest to just apply them all to stable.
====================

Link: https://lore.kernel.org/r/20210222162549.3252778-1-Jason@zx2c4.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents fc0494ea bce24739
...@@ -88,7 +88,7 @@ config WIREGUARD ...@@ -88,7 +88,7 @@ config WIREGUARD
select CRYPTO_CURVE25519_X86 if X86 && 64BIT select CRYPTO_CURVE25519_X86 if X86 && 64BIT
select ARM_CRYPTO if ARM select ARM_CRYPTO if ARM
select ARM64_CRYPTO if ARM64 select ARM64_CRYPTO if ARM64
select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON select CRYPTO_CHACHA20_NEON if ARM || (ARM64 && KERNEL_MODE_NEON)
select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON
select CRYPTO_POLY1305_ARM if ARM select CRYPTO_POLY1305_ARM if ARM
select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
......
...@@ -138,7 +138,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -138,7 +138,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
else if (skb->protocol == htons(ETH_P_IPV6)) else if (skb->protocol == htons(ETH_P_IPV6))
net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
dev->name, &ipv6_hdr(skb)->daddr); dev->name, &ipv6_hdr(skb)->daddr);
goto err; goto err_icmp;
} }
family = READ_ONCE(peer->endpoint.addr.sa_family); family = READ_ONCE(peer->endpoint.addr.sa_family);
...@@ -157,7 +157,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -157,7 +157,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
} else { } else {
struct sk_buff *segs = skb_gso_segment(skb, 0); struct sk_buff *segs = skb_gso_segment(skb, 0);
if (unlikely(IS_ERR(segs))) { if (IS_ERR(segs)) {
ret = PTR_ERR(segs); ret = PTR_ERR(segs);
goto err_peer; goto err_peer;
} }
...@@ -201,12 +201,13 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -201,12 +201,13 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
err_peer: err_peer:
wg_peer_put(peer); wg_peer_put(peer);
err: err_icmp:
++dev->stats.tx_errors;
if (skb->protocol == htons(ETH_P_IP)) if (skb->protocol == htons(ETH_P_IP))
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
else if (skb->protocol == htons(ETH_P_IPV6)) else if (skb->protocol == htons(ETH_P_IPV6))
icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
err:
++dev->stats.tx_errors;
kfree_skb(skb); kfree_skb(skb);
return ret; return ret;
} }
...@@ -234,8 +235,8 @@ static void wg_destruct(struct net_device *dev) ...@@ -234,8 +235,8 @@ static void wg_destruct(struct net_device *dev)
destroy_workqueue(wg->handshake_receive_wq); destroy_workqueue(wg->handshake_receive_wq);
destroy_workqueue(wg->handshake_send_wq); destroy_workqueue(wg->handshake_send_wq);
destroy_workqueue(wg->packet_crypt_wq); destroy_workqueue(wg->packet_crypt_wq);
wg_packet_queue_free(&wg->decrypt_queue, true); wg_packet_queue_free(&wg->decrypt_queue);
wg_packet_queue_free(&wg->encrypt_queue, true); wg_packet_queue_free(&wg->encrypt_queue);
rcu_barrier(); /* Wait for all the peers to be actually freed. */ rcu_barrier(); /* Wait for all the peers to be actually freed. */
wg_ratelimiter_uninit(); wg_ratelimiter_uninit();
memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
...@@ -337,12 +338,12 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, ...@@ -337,12 +338,12 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
goto err_destroy_handshake_send; goto err_destroy_handshake_send;
ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
true, MAX_QUEUED_PACKETS); MAX_QUEUED_PACKETS);
if (ret < 0) if (ret < 0)
goto err_destroy_packet_crypt; goto err_destroy_packet_crypt;
ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
true, MAX_QUEUED_PACKETS); MAX_QUEUED_PACKETS);
if (ret < 0) if (ret < 0)
goto err_free_encrypt_queue; goto err_free_encrypt_queue;
...@@ -367,9 +368,9 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, ...@@ -367,9 +368,9 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
err_uninit_ratelimiter: err_uninit_ratelimiter:
wg_ratelimiter_uninit(); wg_ratelimiter_uninit();
err_free_decrypt_queue: err_free_decrypt_queue:
wg_packet_queue_free(&wg->decrypt_queue, true); wg_packet_queue_free(&wg->decrypt_queue);
err_free_encrypt_queue: err_free_encrypt_queue:
wg_packet_queue_free(&wg->encrypt_queue, true); wg_packet_queue_free(&wg->encrypt_queue);
err_destroy_packet_crypt: err_destroy_packet_crypt:
destroy_workqueue(wg->packet_crypt_wq); destroy_workqueue(wg->packet_crypt_wq);
err_destroy_handshake_send: err_destroy_handshake_send:
......
...@@ -27,13 +27,14 @@ struct multicore_worker { ...@@ -27,13 +27,14 @@ struct multicore_worker {
struct crypt_queue { struct crypt_queue {
struct ptr_ring ring; struct ptr_ring ring;
union { struct multicore_worker __percpu *worker;
struct { int last_cpu;
struct multicore_worker __percpu *worker; };
int last_cpu;
}; struct prev_queue {
struct work_struct work; struct sk_buff *head, *tail, *peeked;
}; struct { struct sk_buff *next, *prev; } empty; // Match first 2 members of struct sk_buff.
atomic_t count;
}; };
struct wg_device { struct wg_device {
......
...@@ -32,27 +32,22 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, ...@@ -32,27 +32,22 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
peer = kzalloc(sizeof(*peer), GFP_KERNEL); peer = kzalloc(sizeof(*peer), GFP_KERNEL);
if (unlikely(!peer)) if (unlikely(!peer))
return ERR_PTR(ret); return ERR_PTR(ret);
peer->device = wg; if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
goto err;
peer->device = wg;
wg_noise_handshake_init(&peer->handshake, &wg->static_identity, wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
public_key, preshared_key, peer); public_key, preshared_key, peer);
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
goto err_1;
if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
MAX_QUEUED_PACKETS))
goto err_2;
if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
MAX_QUEUED_PACKETS))
goto err_3;
peer->internal_id = atomic64_inc_return(&peer_counter); peer->internal_id = atomic64_inc_return(&peer_counter);
peer->serial_work_cpu = nr_cpumask_bits; peer->serial_work_cpu = nr_cpumask_bits;
wg_cookie_init(&peer->latest_cookie); wg_cookie_init(&peer->latest_cookie);
wg_timers_init(peer); wg_timers_init(peer);
wg_cookie_checker_precompute_peer_keys(peer); wg_cookie_checker_precompute_peer_keys(peer);
spin_lock_init(&peer->keypairs.keypair_update_lock); spin_lock_init(&peer->keypairs.keypair_update_lock);
INIT_WORK(&peer->transmit_handshake_work, INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker);
wg_packet_handshake_send_worker); INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker);
wg_prev_queue_init(&peer->tx_queue);
wg_prev_queue_init(&peer->rx_queue);
rwlock_init(&peer->endpoint_lock); rwlock_init(&peer->endpoint_lock);
kref_init(&peer->refcount); kref_init(&peer->refcount);
skb_queue_head_init(&peer->staged_packet_queue); skb_queue_head_init(&peer->staged_packet_queue);
...@@ -68,11 +63,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, ...@@ -68,11 +63,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
return peer; return peer;
err_3: err:
wg_packet_queue_free(&peer->tx_queue, false);
err_2:
dst_cache_destroy(&peer->endpoint_cache);
err_1:
kfree(peer); kfree(peer);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
...@@ -197,8 +188,7 @@ static void rcu_release(struct rcu_head *rcu) ...@@ -197,8 +188,7 @@ static void rcu_release(struct rcu_head *rcu)
struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
dst_cache_destroy(&peer->endpoint_cache); dst_cache_destroy(&peer->endpoint_cache);
wg_packet_queue_free(&peer->rx_queue, false); WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue));
wg_packet_queue_free(&peer->tx_queue, false);
/* The final zeroing takes care of clearing any remaining handshake key /* The final zeroing takes care of clearing any remaining handshake key
* material and other potentially sensitive information. * material and other potentially sensitive information.
......
...@@ -36,16 +36,17 @@ struct endpoint { ...@@ -36,16 +36,17 @@ struct endpoint {
struct wg_peer { struct wg_peer {
struct wg_device *device; struct wg_device *device;
struct crypt_queue tx_queue, rx_queue; struct prev_queue tx_queue, rx_queue;
struct sk_buff_head staged_packet_queue; struct sk_buff_head staged_packet_queue;
int serial_work_cpu; int serial_work_cpu;
bool is_dead;
struct noise_keypairs keypairs; struct noise_keypairs keypairs;
struct endpoint endpoint; struct endpoint endpoint;
struct dst_cache endpoint_cache; struct dst_cache endpoint_cache;
rwlock_t endpoint_lock; rwlock_t endpoint_lock;
struct noise_handshake handshake; struct noise_handshake handshake;
atomic64_t last_sent_handshake; atomic64_t last_sent_handshake;
struct work_struct transmit_handshake_work, clear_peer_work; struct work_struct transmit_handshake_work, clear_peer_work, transmit_packet_work;
struct cookie latest_cookie; struct cookie latest_cookie;
struct hlist_node pubkey_hash; struct hlist_node pubkey_hash;
u64 rx_bytes, tx_bytes; u64 rx_bytes, tx_bytes;
...@@ -61,9 +62,8 @@ struct wg_peer { ...@@ -61,9 +62,8 @@ struct wg_peer {
struct rcu_head rcu; struct rcu_head rcu;
struct list_head peer_list; struct list_head peer_list;
struct list_head allowedips_list; struct list_head allowedips_list;
u64 internal_id;
struct napi_struct napi; struct napi_struct napi;
bool is_dead; u64 internal_id;
}; };
struct wg_peer *wg_peer_create(struct wg_device *wg, struct wg_peer *wg_peer_create(struct wg_device *wg,
......
...@@ -9,8 +9,7 @@ struct multicore_worker __percpu * ...@@ -9,8 +9,7 @@ struct multicore_worker __percpu *
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
{ {
int cpu; int cpu;
struct multicore_worker __percpu *worker = struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker);
alloc_percpu(struct multicore_worker);
if (!worker) if (!worker)
return NULL; return NULL;
...@@ -23,7 +22,7 @@ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) ...@@ -23,7 +22,7 @@ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
} }
int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
bool multicore, unsigned int len) unsigned int len)
{ {
int ret; int ret;
...@@ -31,25 +30,78 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, ...@@ -31,25 +30,78 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
if (ret) if (ret)
return ret; return ret;
if (function) { queue->worker = wg_packet_percpu_multicore_worker_alloc(function, queue);
if (multicore) { if (!queue->worker) {
queue->worker = wg_packet_percpu_multicore_worker_alloc( ptr_ring_cleanup(&queue->ring, NULL);
function, queue); return -ENOMEM;
if (!queue->worker) {
ptr_ring_cleanup(&queue->ring, NULL);
return -ENOMEM;
}
} else {
INIT_WORK(&queue->work, function);
}
} }
return 0; return 0;
} }
void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) void wg_packet_queue_free(struct crypt_queue *queue)
{ {
if (multicore) free_percpu(queue->worker);
free_percpu(queue->worker);
WARN_ON(!__ptr_ring_empty(&queue->ring)); WARN_ON(!__ptr_ring_empty(&queue->ring));
ptr_ring_cleanup(&queue->ring, NULL); ptr_ring_cleanup(&queue->ring, NULL);
} }
#define NEXT(skb) ((skb)->prev)
#define STUB(queue) ((struct sk_buff *)&queue->empty)
void wg_prev_queue_init(struct prev_queue *queue)
{
NEXT(STUB(queue)) = NULL;
queue->head = queue->tail = STUB(queue);
queue->peeked = NULL;
atomic_set(&queue->count, 0);
BUILD_BUG_ON(
offsetof(struct sk_buff, next) != offsetof(struct prev_queue, empty.next) -
offsetof(struct prev_queue, empty) ||
offsetof(struct sk_buff, prev) != offsetof(struct prev_queue, empty.prev) -
offsetof(struct prev_queue, empty));
}
static void __wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb)
{
WRITE_ONCE(NEXT(skb), NULL);
WRITE_ONCE(NEXT(xchg_release(&queue->head, skb)), skb);
}
bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb)
{
if (!atomic_add_unless(&queue->count, 1, MAX_QUEUED_PACKETS))
return false;
__wg_prev_queue_enqueue(queue, skb);
return true;
}
struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue)
{
struct sk_buff *tail = queue->tail, *next = smp_load_acquire(&NEXT(tail));
if (tail == STUB(queue)) {
if (!next)
return NULL;
queue->tail = next;
tail = next;
next = smp_load_acquire(&NEXT(next));
}
if (next) {
queue->tail = next;
atomic_dec(&queue->count);
return tail;
}
if (tail != READ_ONCE(queue->head))
return NULL;
__wg_prev_queue_enqueue(queue, STUB(queue));
next = smp_load_acquire(&NEXT(tail));
if (next) {
queue->tail = next;
atomic_dec(&queue->count);
return tail;
}
return NULL;
}
#undef NEXT
#undef STUB
...@@ -17,12 +17,13 @@ struct wg_device; ...@@ -17,12 +17,13 @@ struct wg_device;
struct wg_peer; struct wg_peer;
struct multicore_worker; struct multicore_worker;
struct crypt_queue; struct crypt_queue;
struct prev_queue;
struct sk_buff; struct sk_buff;
/* queueing.c APIs: */ /* queueing.c APIs: */
int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
bool multicore, unsigned int len); unsigned int len);
void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); void wg_packet_queue_free(struct crypt_queue *queue);
struct multicore_worker __percpu * struct multicore_worker __percpu *
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
...@@ -135,8 +136,31 @@ static inline int wg_cpumask_next_online(int *next) ...@@ -135,8 +136,31 @@ static inline int wg_cpumask_next_online(int *next)
return cpu; return cpu;
} }
void wg_prev_queue_init(struct prev_queue *queue);
/* Multi producer */
bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb);
/* Single consumer */
struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue);
/* Single consumer */
static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue)
{
if (queue->peeked)
return queue->peeked;
queue->peeked = wg_prev_queue_dequeue(queue);
return queue->peeked;
}
/* Single consumer */
static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue)
{
queue->peeked = NULL;
}
static inline int wg_queue_enqueue_per_device_and_peer( static inline int wg_queue_enqueue_per_device_and_peer(
struct crypt_queue *device_queue, struct crypt_queue *peer_queue, struct crypt_queue *device_queue, struct prev_queue *peer_queue,
struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
{ {
int cpu; int cpu;
...@@ -145,8 +169,9 @@ static inline int wg_queue_enqueue_per_device_and_peer( ...@@ -145,8 +169,9 @@ static inline int wg_queue_enqueue_per_device_and_peer(
/* We first queue this up for the peer ingestion, but the consumer /* We first queue this up for the peer ingestion, but the consumer
* will wait for the state to change to CRYPTED or DEAD before. * will wait for the state to change to CRYPTED or DEAD before.
*/ */
if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb)))
return -ENOSPC; return -ENOSPC;
/* Then we queue it up in the device queue, which consumes the /* Then we queue it up in the device queue, which consumes the
* packet as soon as it can. * packet as soon as it can.
*/ */
...@@ -157,9 +182,7 @@ static inline int wg_queue_enqueue_per_device_and_peer( ...@@ -157,9 +182,7 @@ static inline int wg_queue_enqueue_per_device_and_peer(
return 0; return 0;
} }
static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state)
struct sk_buff *skb,
enum packet_state state)
{ {
/* We take a reference, because as soon as we call atomic_set, the /* We take a reference, because as soon as we call atomic_set, the
* peer can be freed from below us. * peer can be freed from below us.
...@@ -167,14 +190,12 @@ static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, ...@@ -167,14 +190,12 @@ static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
atomic_set_release(&PACKET_CB(skb)->state, state); atomic_set_release(&PACKET_CB(skb)->state, state);
queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id),
peer->internal_id), peer->device->packet_crypt_wq, &peer->transmit_packet_work);
peer->device->packet_crypt_wq, &queue->work);
wg_peer_put(peer); wg_peer_put(peer);
} }
static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state)
enum packet_state state)
{ {
/* We take a reference, because as soon as we call atomic_set, the /* We take a reference, because as soon as we call atomic_set, the
* peer can be freed from below us. * peer can be freed from below us.
......
...@@ -444,7 +444,6 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, ...@@ -444,7 +444,6 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
int wg_packet_rx_poll(struct napi_struct *napi, int budget) int wg_packet_rx_poll(struct napi_struct *napi, int budget)
{ {
struct wg_peer *peer = container_of(napi, struct wg_peer, napi); struct wg_peer *peer = container_of(napi, struct wg_peer, napi);
struct crypt_queue *queue = &peer->rx_queue;
struct noise_keypair *keypair; struct noise_keypair *keypair;
struct endpoint endpoint; struct endpoint endpoint;
enum packet_state state; enum packet_state state;
...@@ -455,11 +454,10 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) ...@@ -455,11 +454,10 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget)
if (unlikely(budget <= 0)) if (unlikely(budget <= 0))
return 0; return 0;
while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && while ((skb = wg_prev_queue_peek(&peer->rx_queue)) != NULL &&
(state = atomic_read_acquire(&PACKET_CB(skb)->state)) != (state = atomic_read_acquire(&PACKET_CB(skb)->state)) !=
PACKET_STATE_UNCRYPTED) { PACKET_STATE_UNCRYPTED) {
__ptr_ring_discard_one(&queue->ring); wg_prev_queue_drop_peeked(&peer->rx_queue);
peer = PACKET_PEER(skb);
keypair = PACKET_CB(skb)->keypair; keypair = PACKET_CB(skb)->keypair;
free = true; free = true;
...@@ -508,7 +506,7 @@ void wg_packet_decrypt_worker(struct work_struct *work) ...@@ -508,7 +506,7 @@ void wg_packet_decrypt_worker(struct work_struct *work)
enum packet_state state = enum packet_state state =
likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ? likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ?
PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
wg_queue_enqueue_per_peer_napi(skb, state); wg_queue_enqueue_per_peer_rx(skb, state);
if (need_resched()) if (need_resched())
cond_resched(); cond_resched();
} }
...@@ -531,12 +529,10 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) ...@@ -531,12 +529,10 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
if (unlikely(READ_ONCE(peer->is_dead))) if (unlikely(READ_ONCE(peer->is_dead)))
goto err; goto err;
ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb,
&peer->rx_queue, skb, wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu);
wg->packet_crypt_wq,
&wg->decrypt_queue.last_cpu);
if (unlikely(ret == -EPIPE)) if (unlikely(ret == -EPIPE))
wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD);
if (likely(!ret || ret == -EPIPE)) { if (likely(!ret || ret == -EPIPE)) {
rcu_read_unlock_bh(); rcu_read_unlock_bh();
return; return;
......
...@@ -239,8 +239,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer) ...@@ -239,8 +239,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer)
wg_packet_send_staged_packets(peer); wg_packet_send_staged_packets(peer);
} }
static void wg_packet_create_data_done(struct sk_buff *first, static void wg_packet_create_data_done(struct wg_peer *peer, struct sk_buff *first)
struct wg_peer *peer)
{ {
struct sk_buff *skb, *next; struct sk_buff *skb, *next;
bool is_keepalive, data_sent = false; bool is_keepalive, data_sent = false;
...@@ -262,22 +261,19 @@ static void wg_packet_create_data_done(struct sk_buff *first, ...@@ -262,22 +261,19 @@ static void wg_packet_create_data_done(struct sk_buff *first,
void wg_packet_tx_worker(struct work_struct *work) void wg_packet_tx_worker(struct work_struct *work)
{ {
struct crypt_queue *queue = container_of(work, struct crypt_queue, struct wg_peer *peer = container_of(work, struct wg_peer, transmit_packet_work);
work);
struct noise_keypair *keypair; struct noise_keypair *keypair;
enum packet_state state; enum packet_state state;
struct sk_buff *first; struct sk_buff *first;
struct wg_peer *peer;
while ((first = __ptr_ring_peek(&queue->ring)) != NULL && while ((first = wg_prev_queue_peek(&peer->tx_queue)) != NULL &&
(state = atomic_read_acquire(&PACKET_CB(first)->state)) != (state = atomic_read_acquire(&PACKET_CB(first)->state)) !=
PACKET_STATE_UNCRYPTED) { PACKET_STATE_UNCRYPTED) {
__ptr_ring_discard_one(&queue->ring); wg_prev_queue_drop_peeked(&peer->tx_queue);
peer = PACKET_PEER(first);
keypair = PACKET_CB(first)->keypair; keypair = PACKET_CB(first)->keypair;
if (likely(state == PACKET_STATE_CRYPTED)) if (likely(state == PACKET_STATE_CRYPTED))
wg_packet_create_data_done(first, peer); wg_packet_create_data_done(peer, first);
else else
kfree_skb_list(first); kfree_skb_list(first);
...@@ -306,16 +302,14 @@ void wg_packet_encrypt_worker(struct work_struct *work) ...@@ -306,16 +302,14 @@ void wg_packet_encrypt_worker(struct work_struct *work)
break; break;
} }
} }
wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, wg_queue_enqueue_per_peer_tx(first, state);
state);
if (need_resched()) if (need_resched())
cond_resched(); cond_resched();
} }
} }
static void wg_packet_create_data(struct sk_buff *first) static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first)
{ {
struct wg_peer *peer = PACKET_PEER(first);
struct wg_device *wg = peer->device; struct wg_device *wg = peer->device;
int ret = -EINVAL; int ret = -EINVAL;
...@@ -323,13 +317,10 @@ static void wg_packet_create_data(struct sk_buff *first) ...@@ -323,13 +317,10 @@ static void wg_packet_create_data(struct sk_buff *first)
if (unlikely(READ_ONCE(peer->is_dead))) if (unlikely(READ_ONCE(peer->is_dead)))
goto err; goto err;
ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first,
&peer->tx_queue, first, wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu);
wg->packet_crypt_wq,
&wg->encrypt_queue.last_cpu);
if (unlikely(ret == -EPIPE)) if (unlikely(ret == -EPIPE))
wg_queue_enqueue_per_peer(&peer->tx_queue, first, wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD);
PACKET_STATE_DEAD);
err: err:
rcu_read_unlock_bh(); rcu_read_unlock_bh();
if (likely(!ret || ret == -EPIPE)) if (likely(!ret || ret == -EPIPE))
...@@ -393,7 +384,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) ...@@ -393,7 +384,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer)
packets.prev->next = NULL; packets.prev->next = NULL;
wg_peer_get(keypair->entry.peer); wg_peer_get(keypair->entry.peer);
PACKET_CB(packets.next)->keypair = keypair; PACKET_CB(packets.next)->keypair = keypair;
wg_packet_create_data(packets.next); wg_packet_create_data(peer, packets.next);
return; return;
out_invalid: out_invalid:
......
...@@ -53,7 +53,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, ...@@ -53,7 +53,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0,
fl.saddr, RT_SCOPE_HOST))) { fl.saddr, RT_SCOPE_HOST))) {
endpoint->src4.s_addr = 0; endpoint->src4.s_addr = 0;
*(__force __be32 *)&endpoint->src_if4 = 0; endpoint->src_if4 = 0;
fl.saddr = 0; fl.saddr = 0;
if (cache) if (cache)
dst_cache_reset(cache); dst_cache_reset(cache);
...@@ -63,7 +63,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, ...@@ -63,7 +63,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) &&
rt->dst.dev->ifindex != endpoint->src_if4)))) { rt->dst.dev->ifindex != endpoint->src_if4)))) {
endpoint->src4.s_addr = 0; endpoint->src4.s_addr = 0;
*(__force __be32 *)&endpoint->src_if4 = 0; endpoint->src_if4 = 0;
fl.saddr = 0; fl.saddr = 0;
if (cache) if (cache)
dst_cache_reset(cache); dst_cache_reset(cache);
...@@ -71,7 +71,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, ...@@ -71,7 +71,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
ip_rt_put(rt); ip_rt_put(rt);
rt = ip_route_output_flow(sock_net(sock), &fl, sock); rt = ip_route_output_flow(sock_net(sock), &fl, sock);
} }
if (unlikely(IS_ERR(rt))) { if (IS_ERR(rt)) {
ret = PTR_ERR(rt); ret = PTR_ERR(rt);
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
wg->dev->name, &endpoint->addr, ret); wg->dev->name, &endpoint->addr, ret);
...@@ -138,7 +138,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, ...@@ -138,7 +138,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb,
} }
dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl,
NULL); NULL);
if (unlikely(IS_ERR(dst))) { if (IS_ERR(dst)) {
ret = PTR_ERR(dst); ret = PTR_ERR(dst);
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
wg->dev->name, &endpoint->addr, ret); wg->dev->name, &endpoint->addr, ret);
......
...@@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } ...@@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
sleep() { read -t "$1" -N 1 || true; } sleep() { read -t "$1" -N 1 || true; }
waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
...@@ -141,6 +141,19 @@ tests() { ...@@ -141,6 +141,19 @@ tests() {
n2 iperf3 -s -1 -B fd00::2 & n2 iperf3 -s -1 -B fd00::2 &
waitiperf $netns2 $! waitiperf $netns2 $!
n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
# TCP over IPv4, in parallel
for max in 4 5 50; do
local pids=( )
for ((i=0; i < max; ++i)) do
n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
done
for ((i=0; i < max; ++i)) do
n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
done
wait "${pids[@]}"
done
} }
[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment