Commit 74b4bed9 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-permit-skb_segment-on-head_frag-frag_list-skb'

Yonghong Song says:

====================
net: permit skb_segment on head_frag frag_list skb

One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
function skb_segment(), line 3667. The bpf program attaches to
clsact ingress, calls bpf_skb_change_proto to change protocol
from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
to send the changed packet out.
 ...
    3665                 while (pos < offset + len) {
    3666                         if (i >= nfrags) {
    3667                                 BUG_ON(skb_headlen(list_skb));
 ...

The triggering input skb has the following properties:
    list_skb = skb->frag_list;
    skb->nfrags != NULL && skb_headlen(list_skb) != 0
and skb_segment() is not able to handle a frag_list skb
if its headlen (list_skb->len - list_skb->data_len) is not 0.

Patch #1 provides a simple solution to avoid BUG_ON. If
list_skb->head_frag is true, its page-backed frag will
be processed before the list_skb->frags.
Patch #2 provides a test case in test_bpf module which
constructs a skb and calls skb_segment() directly. The test
case is able to trigger the BUG_ON without Patch #1.

The patch has been tested in the following setup:
  ipv6_host <-> nat_server <-> ipv4_host
where nat_server has a bpf program doing ipv4<->ipv6
translation and forwarding through clsact hook
bpf_skb_change_proto.

Changelog:
v5 -> v6:
  . Added back missed BUG_ON(!nfrags) for zero
    skb_headlen(skb) case, plus a couple of
    cosmetic changes, from Alexander.
v4 -> v5:
  . Replace local variable head_frag with
    a static inline function skb_head_frag_to_page_desc
    which gets the head_frag on-demand. This makes
    code more readable and also does not increase
    the stack size, from Alexander.
  . Remove the "if(nfrags)" guard for skb_orphan_frags
    and skb_zerocopy_clone as I found that they can
    handle zero-frag skb (with non-zero skb_headlen(skb))
    properly.
  . Properly release segment list from skb_segment()
    in the test, from Eric.
v3 -> v4:
  . Remove dynamic memory allocation and use rewinding
    for both index and frag to remove one branch in fast path,
    from Alexander.
  . Fix a bunch of issues in test_bpf skb_segment() test,
    including proper way to allocate skb, proper function
    argument for skb_add_rx_frag and not freeint skb, etc.,
    from Eric.
v2 -> v3:
  . Use starting frag index -1 (instead of 0) to
    special process head_frag before other frags in the skb,
    from Alexander Duyck.
v1 -> v2:
  . Removed never-hit BUG_ON, spotted by Linyu Yuan.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 03740165 76db8087
...@@ -6574,6 +6574,93 @@ static bool exclude_test(int test_id) ...@@ -6574,6 +6574,93 @@ static bool exclude_test(int test_id)
return test_id < test_range[0] || test_id > test_range[1]; return test_id < test_range[0] || test_id > test_range[1];
} }
static __init struct sk_buff *build_test_skb(void)
{
u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
struct sk_buff *skb[2];
struct page *page[2];
int i, data_size = 8;
for (i = 0; i < 2; i++) {
page[i] = alloc_page(GFP_KERNEL);
if (!page[i]) {
if (i == 0)
goto err_page0;
else
goto err_page1;
}
/* this will set skb[i]->head_frag */
skb[i] = dev_alloc_skb(headroom + data_size);
if (!skb[i]) {
if (i == 0)
goto err_skb0;
else
goto err_skb1;
}
skb_reserve(skb[i], headroom);
skb_put(skb[i], data_size);
skb[i]->protocol = htons(ETH_P_IP);
skb_reset_network_header(skb[i]);
skb_set_mac_header(skb[i], -ETH_HLEN);
skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
}
/* setup shinfo */
skb_shinfo(skb[0])->gso_size = 1448;
skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
skb_shinfo(skb[0])->gso_segs = 0;
skb_shinfo(skb[0])->frag_list = skb[1];
/* adjust skb[0]'s len */
skb[0]->len += skb[1]->len;
skb[0]->data_len += skb[1]->data_len;
skb[0]->truesize += skb[1]->truesize;
return skb[0];
err_skb1:
__free_page(page[1]);
err_page1:
kfree_skb(skb[0]);
err_skb0:
__free_page(page[0]);
err_page0:
return NULL;
}
static __init int test_skb_segment(void)
{
netdev_features_t features;
struct sk_buff *skb, *segs;
int ret = -1;
features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM;
features |= NETIF_F_RXCSUM;
skb = build_test_skb();
if (!skb) {
pr_info("%s: failed to build_test_skb", __func__);
goto done;
}
segs = skb_segment(skb, features);
if (segs) {
kfree_skb_list(segs);
ret = 0;
pr_info("%s: success in skb_segment!", __func__);
} else {
pr_info("%s: failed in skb_segment!", __func__);
}
kfree_skb(skb);
done:
return ret;
}
static __init int test_bpf(void) static __init int test_bpf(void)
{ {
int i, err_cnt = 0, pass_cnt = 0; int i, err_cnt = 0, pass_cnt = 0;
...@@ -6632,9 +6719,11 @@ static int __init test_bpf_init(void) ...@@ -6632,9 +6719,11 @@ static int __init test_bpf_init(void)
return ret; return ret;
ret = test_bpf(); ret = test_bpf();
destroy_bpf_tests(); destroy_bpf_tests();
return ret; if (ret)
return ret;
return test_skb_segment();
} }
static void __exit test_bpf_exit(void) static void __exit test_bpf_exit(void)
......
...@@ -3460,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) ...@@ -3460,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
} }
EXPORT_SYMBOL_GPL(skb_pull_rcsum); EXPORT_SYMBOL_GPL(skb_pull_rcsum);
static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
{
skb_frag_t head_frag;
struct page *page;
page = virt_to_head_page(frag_skb->head);
head_frag.page.p = page;
head_frag.page_offset = frag_skb->data -
(unsigned char *)page_address(page);
head_frag.size = skb_headlen(frag_skb);
return head_frag;
}
/** /**
* skb_segment - Perform protocol segmentation on skb. * skb_segment - Perform protocol segmentation on skb.
* @head_skb: buffer to segment * @head_skb: buffer to segment
...@@ -3664,15 +3677,19 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, ...@@ -3664,15 +3677,19 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
while (pos < offset + len) { while (pos < offset + len) {
if (i >= nfrags) { if (i >= nfrags) {
BUG_ON(skb_headlen(list_skb));
i = 0; i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags; nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags; frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb; frag_skb = list_skb;
if (!skb_headlen(list_skb)) {
BUG_ON(!nfrags);
} else {
BUG_ON(!list_skb->head_frag);
BUG_ON(!nfrags); /* to make room for head_frag. */
i--;
frag--;
}
if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
skb_zerocopy_clone(nskb, frag_skb, skb_zerocopy_clone(nskb, frag_skb,
GFP_ATOMIC)) GFP_ATOMIC))
...@@ -3689,7 +3706,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, ...@@ -3689,7 +3706,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
goto err; goto err;
} }
*nskb_frag = *frag; *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
__skb_frag_ref(nskb_frag); __skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag); size = skb_frag_size(nskb_frag);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment