Commit ee93b1ff authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-gro-reduce-extension-header-parsing-overhead'

Richard Gobert says:

====================
net: gro: reduce extension header parsing overhead

This series attempts to reduce the parsing overhead of IPv6 extension
headers in GRO and GSO, by removing extension header specific code and
enabling the frag0 fast path.

The following changes were made:

 - Removed some unnecessary HBH conditionals by adding HBH offload
   to inet6_offloads
 - Added a utility function to support frag0 fast path in ipv6_gro_receive
 - Added selftests for IPv6 packets with extension headers in GRO

v2: https://lore.kernel.org/netdev/127b8199-1cd4-42d7-9b2b-875abaad93fe@gmail.com/
v1: https://lore.kernel.org/netdev/f4eff69d-3917-4c42-8c6b-d09597ac4437@gmail.com/
====================

Link: https://lore.kernel.org/r/ac6fb684-c00e-449c-92c3-99358a927ade@gmail.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents cb420106 4e321d59
......@@ -16,6 +16,10 @@ static const struct net_offload dstopt_offload = {
.flags = INET6_PROTO_GSO_EXTHDR,
};
static const struct net_offload hbh_offload = {
.flags = INET6_PROTO_GSO_EXTHDR,
};
int __init ipv6_exthdrs_offload_init(void)
{
int ret;
......@@ -28,9 +32,16 @@ int __init ipv6_exthdrs_offload_init(void)
if (ret)
goto out_rt;
ret = inet6_add_offload(&hbh_offload, IPPROTO_HOPOPTS);
if (ret)
goto out_dstopts;
out:
return ret;
out_dstopts:
inet6_del_offload(&dstopt_offload, IPPROTO_DSTOPTS);
out_rt:
inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
goto out;
......
......@@ -37,6 +37,40 @@
INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
})
static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
{
const struct net_offload *ops = NULL;
struct ipv6_opt_hdr *opth;
for (;;) {
int len;
ops = rcu_dereference(inet6_offloads[proto]);
if (unlikely(!ops))
break;
if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
break;
opth = skb_gro_header(skb, off + sizeof(*opth), off);
if (unlikely(!opth))
break;
len = ipv6_optlen(opth);
opth = skb_gro_header(skb, off + len, off);
if (unlikely(!opth))
break;
proto = opth->nexthdr;
off += len;
}
skb_gro_pull(skb, off - skb_network_offset(skb));
return proto;
}
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
{
const struct net_offload *ops = NULL;
......@@ -45,15 +79,13 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
struct ipv6_opt_hdr *opth;
int len;
if (proto != NEXTHDR_HOP) {
ops = rcu_dereference(inet6_offloads[proto]);
ops = rcu_dereference(inet6_offloads[proto]);
if (unlikely(!ops))
break;
if (unlikely(!ops))
break;
if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
break;
}
if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
break;
if (unlikely(!pskb_may_pull(skb, 8)))
break;
......@@ -171,13 +203,12 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
proto = iph->nexthdr;
for (;;) {
if (proto != NEXTHDR_HOP) {
*opps = rcu_dereference(inet6_offloads[proto]);
if (unlikely(!(*opps)))
break;
if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
break;
}
*opps = rcu_dereference(inet6_offloads[proto]);
if (unlikely(!(*opps)))
break;
if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
break;
opth = (void *)opth + optlen;
optlen = ipv6_optlen(opth);
len += optlen;
......@@ -206,28 +237,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
goto out;
skb_set_network_header(skb, off);
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
flush += ntohs(iph->payload_len) != skb_gro_len(skb);
flush += ntohs(iph->payload_len) != skb->len - hlen;
proto = iph->nexthdr;
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive) {
pskb_pull(skb, skb_gro_offset(skb));
skb_gro_frag0_invalidate(skb);
proto = ipv6_gso_pull_exthdrs(skb, proto);
skb_gro_pull(skb, -skb_transport_offset(skb));
skb_reset_transport_header(skb);
__skb_push(skb, skb_gro_offset(skb));
proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive)
goto out;
iph = ipv6_hdr(skb);
iph = skb_gro_network_header(skb);
} else {
skb_gro_pull(skb, sizeof(*iph));
}
skb_set_transport_header(skb, skb_gro_offset(skb));
NAPI_GRO_CB(skb)->proto = proto;
flush--;
......
......@@ -71,6 +71,12 @@
#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
#define MIN_EXTHDR_SIZE 8
#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
static const char *addr6_src = "fdaa::2";
static const char *addr6_dst = "fdaa::1";
......@@ -104,7 +110,7 @@ static void setup_sock_filter(int fd)
const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
const int ethproto_off = offsetof(struct ethhdr, h_proto);
int optlen = 0;
int ipproto_off;
int ipproto_off, opt_ipproto_off;
int next_off;
if (proto == PF_INET)
......@@ -116,14 +122,30 @@ static void setup_sock_filter(int fd)
if (strcmp(testname, "ip") == 0) {
if (proto == PF_INET)
optlen = sizeof(struct ip_timestamp);
else
optlen = sizeof(struct ip6_frag);
else {
BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
/* same size for HBH and Fragment extension header types */
optlen = MIN_EXTHDR_SIZE;
opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+ offsetof(struct ip6_ext, ip6e_nxt);
}
}
/* this filter validates the following:
* - packet is IPv4/IPv6 according to the running test.
* - packet is TCP. Also handles the case of one extension header and then TCP.
* - checks the packet tcp dport equals to DPORT. Also handles the case of one
* extension header and then TCP.
*/
struct sock_filter filter[] = {
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
......@@ -576,6 +598,39 @@ static void add_ipv4_ts_option(void *buf, void *optpkt)
iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
}
static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
{
struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
char *exthdr_payload_start = (char *)(exthdr + 1);
exthdr->hdrlen = 0;
exthdr->nexthdr = IPPROTO_TCP;
memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
memcpy(optpkt, buf, tcp_offset);
memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
sizeof(struct tcphdr) + PAYLOAD_LEN);
iph->nexthdr = exthdr_type;
iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
}
static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
{
static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
}
/* IPv4 options shouldn't coalesce */
static void send_ip_options(int fd, struct sockaddr_ll *daddr)
{
......@@ -697,7 +752,7 @@ static void send_fragment6(int fd, struct sockaddr_ll *daddr)
create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
write_packet(fd, buf, bufpkt_len, daddr);
}
sleep(1);
create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
memset(extpkt, 0, extpkt_len);
......@@ -760,6 +815,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
vlog("}, Total %d packets\nReceived {", correct_num_pkts);
while (1) {
ip_ext_len = 0;
pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
if (pkt_size < 0)
error(1, errno, "could not receive");
......@@ -767,7 +823,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
if (iph->version == 4)
ip_ext_len = (iph->ihl - 5) * 4;
else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
ip_ext_len = sizeof(struct ip6_frag);
ip_ext_len = MIN_EXTHDR_SIZE;
tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
......@@ -880,7 +936,21 @@ static void gro_sender(void)
sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
} else if (proto == PF_INET6) {
sleep(1);
send_fragment6(txfd, &daddr);
sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
sleep(1);
/* send IPv6 packets with ext header with same payload */
send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
sleep(1);
/* send IPv6 packets with ext header with different payload */
send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
}
} else if (strcmp(testname, "large") == 0) {
......@@ -997,6 +1067,17 @@ static void gro_receiver(void)
*/
printf("fragmented ip6 doesn't coalesce: ");
correct_payload[0] = PAYLOAD_LEN * 2;
correct_payload[1] = PAYLOAD_LEN;
correct_payload[2] = PAYLOAD_LEN;
check_recv_pkts(rxfd, correct_payload, 3);
printf("ipv6 with ext header does coalesce: ");
correct_payload[0] = PAYLOAD_LEN * 2;
check_recv_pkts(rxfd, correct_payload, 1);
printf("ipv6 with ext header with different payloads doesn't coalesce: ");
correct_payload[0] = PAYLOAD_LEN;
correct_payload[1] = PAYLOAD_LEN;
check_recv_pkts(rxfd, correct_payload, 2);
}
} else if (strcmp(testname, "large") == 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment