Commit 193cdc4a authored by David S. Miller's avatar David S. Miller

Merge branch 'udpv6_lockless_send'

Vladislav Yasevich says:

====================
ipv6: Add lockless UDP send path

This series introduces a lockless UDPv6 send path similar to
what Herbert Xu did for IPv4 a while ago.

There are some difference from IPv4.  IPv6 caching for flow
label is a bit different, as well as it requires another cork
cork structure that holds the IPv6 ancillary data.

Please take a look.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ba0c39cb 32dce968
...@@ -125,6 +125,12 @@ struct ipv6_mc_socklist; ...@@ -125,6 +125,12 @@ struct ipv6_mc_socklist;
struct ipv6_ac_socklist; struct ipv6_ac_socklist;
struct ipv6_fl_socklist; struct ipv6_fl_socklist;
struct inet6_cork {
struct ipv6_txoptions *opt;
u8 hop_limit;
u8 tclass;
};
/** /**
* struct ipv6_pinfo - ipv6 private area * struct ipv6_pinfo - ipv6 private area
* *
...@@ -217,11 +223,7 @@ struct ipv6_pinfo { ...@@ -217,11 +223,7 @@ struct ipv6_pinfo {
struct ipv6_txoptions *opt; struct ipv6_txoptions *opt;
struct sk_buff *pktoptions; struct sk_buff *pktoptions;
struct sk_buff *rxpmtu; struct sk_buff *rxpmtu;
struct { struct inet6_cork cork;
struct ipv6_txoptions *opt;
u8 hop_limit;
u8 tclass;
} cork;
}; };
/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
......
...@@ -788,6 +788,25 @@ int ip6_push_pending_frames(struct sock *sk); ...@@ -788,6 +788,25 @@ int ip6_push_pending_frames(struct sock *sk);
void ip6_flush_pending_frames(struct sock *sk); void ip6_flush_pending_frames(struct sock *sk);
int ip6_send_skb(struct sk_buff *skb);
struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
struct inet_cork_full *cork,
struct inet6_cork *v6_cork);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
int hlimit, int tclass, struct ipv6_txoptions *opt,
struct flowi6 *fl6, struct rt6_info *rt,
unsigned int flags, int dontfrag);
static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
{
return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork,
&inet6_sk(sk)->cork);
}
int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst); const struct in6_addr *final_dst);
......
This diff is collapsed.
...@@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, ...@@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
{ {
unsigned int offset; unsigned int offset;
struct udphdr *uh = udp_hdr(skb); struct udphdr *uh = udp_hdr(skb);
struct sk_buff *frags = skb_shinfo(skb)->frag_list;
__wsum csum = 0; __wsum csum = 0;
if (skb_queue_len(&sk->sk_write_queue) == 1) { if (!frags) {
/* Only one fragment on the socket. */ /* Only one fragment on the socket. */
skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check); skb->csum_offset = offsetof(struct udphdr, check);
...@@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, ...@@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
skb_queue_walk(&sk->sk_write_queue, skb) { do {
csum = csum_add(csum, skb->csum); csum = csum_add(csum, frags->csum);
} } while ((frags = frags->next));
uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
csum); csum);
...@@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, ...@@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
* Sending * Sending
*/ */
static int udp_v6_push_pending_frames(struct sock *sk) static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
{ {
struct sk_buff *skb; struct sock *sk = skb->sk;
struct udphdr *uh; struct udphdr *uh;
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct flowi6 *fl6;
int err = 0; int err = 0;
int is_udplite = IS_UDPLITE(sk); int is_udplite = IS_UDPLITE(sk);
__wsum csum = 0; __wsum csum = 0;
int offset = skb_transport_offset(skb);
if (up->pending == AF_INET) int len = skb->len - offset;
return udp_push_pending_frames(sk);
fl6 = &inet->cork.fl.u.ip6;
/* Grab the skbuff where UDP header space exists. */
skb = skb_peek(&sk->sk_write_queue);
if (skb == NULL)
goto out;
/* /*
* Create a UDP header * Create a UDP header
...@@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk) ...@@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk)
uh = udp_hdr(skb); uh = udp_hdr(skb);
uh->source = fl6->fl6_sport; uh->source = fl6->fl6_sport;
uh->dest = fl6->fl6_dport; uh->dest = fl6->fl6_dport;
uh->len = htons(up->len); uh->len = htons(len);
uh->check = 0; uh->check = 0;
if (is_udplite) if (is_udplite)
csum = udplite_csum_outgoing(sk, skb); csum = udplite_csum(skb);
else if (up->no_check6_tx) { /* UDP csum disabled */ else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
goto send; goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len);
up->len);
goto send; goto send;
} else } else
csum = udp_csum_outgoing(sk, skb); csum = udp_csum(skb);
/* add protocol-dependent pseudo-header */ /* add protocol-dependent pseudo-header */
uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
up->len, fl6->flowi6_proto, csum); len, fl6->flowi6_proto, csum);
if (uh->check == 0) if (uh->check == 0)
uh->check = CSUM_MANGLED_0; uh->check = CSUM_MANGLED_0;
send: send:
err = ip6_push_pending_frames(sk); err = ip6_send_skb(skb);
if (err) { if (err) {
if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
UDP6_INC_STATS_USER(sock_net(sk), UDP6_INC_STATS_USER(sock_net(sk),
...@@ -1082,6 +1071,30 @@ static int udp_v6_push_pending_frames(struct sock *sk) ...@@ -1082,6 +1071,30 @@ static int udp_v6_push_pending_frames(struct sock *sk)
} else } else
UDP6_INC_STATS_USER(sock_net(sk), UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_OUTDATAGRAMS, is_udplite); UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}
static int udp_v6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
struct udp_sock *up = udp_sk(sk);
struct flowi6 fl6;
int err = 0;
if (up->pending == AF_INET)
return udp_push_pending_frames(sk);
/* ip6_finish_skb will release the cork, so make a copy of
* fl6 here.
*/
fl6 = inet_sk(sk)->cork.fl.u.ip6;
skb = ip6_finish_skb(sk);
if (!skb)
goto out;
err = udp_v6_send_skb(skb, &fl6);
out: out:
up->len = 0; up->len = 0;
up->pending = 0; up->pending = 0;
...@@ -1164,6 +1177,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1164,6 +1177,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (len > INT_MAX - sizeof(struct udphdr)) if (len > INT_MAX - sizeof(struct udphdr))
return -EMSGSIZE; return -EMSGSIZE;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
if (up->pending) { if (up->pending) {
/* /*
* There are pending frames. * There are pending frames.
...@@ -1294,6 +1308,20 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1294,6 +1308,20 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
goto do_confirm; goto do_confirm;
back_from_confirm: back_from_confirm:
/* Lockless fast path for the non-corking case */
if (!corkreq) {
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt,
&fl6, (struct rt6_info *)dst,
msg->msg_flags, dontfrag);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
goto release_dst;
}
lock_sock(sk); lock_sock(sk);
if (unlikely(up->pending)) { if (unlikely(up->pending)) {
/* The socket is already corked while preparing it. */ /* The socket is already corked while preparing it. */
...@@ -1311,7 +1339,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1311,7 +1339,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (dontfrag < 0) if (dontfrag < 0)
dontfrag = np->dontfrag; dontfrag = np->dontfrag;
up->len += ulen; up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip6_append_data(sk, getfrag, msg, ulen, err = ip6_append_data(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl6, sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
(struct rt6_info *)dst, (struct rt6_info *)dst,
...@@ -1323,6 +1350,11 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1323,6 +1350,11 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0; up->pending = 0;
if (err > 0)
err = np->recverr ? net_xmit_errno(err) : 0;
release_sock(sk);
release_dst:
if (dst) { if (dst) {
if (connected) { if (connected) {
ip6_dst_store(sk, dst, ip6_dst_store(sk, dst,
...@@ -1339,9 +1371,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ...@@ -1339,9 +1371,6 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
dst = NULL; dst = NULL;
} }
if (err > 0)
err = np->recverr ? net_xmit_errno(err) : 0;
release_sock(sk);
out: out:
dst_release(dst); dst_release(dst);
fl6_sock_release(flowlabel); fl6_sock_release(flowlabel);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment