Commit 97550f6f authored by Willem de Bruijn's avatar Willem de Bruijn Committed by Jakub Kicinski

net: compound page support in skb_seq_read

skb_seq_read iterates over an skb, returning pointer and length of
the next data range with each call.

It relies on kmap_atomic to access highmem pages when needed.

An skb frag may be backed by a compound page, but kmap_atomic maps
only a single page. There are not enough kmap slots to always map all
pages concurrently.

Instead, if kmap_atomic is needed, iterate over each page.

As this increases the number of calls, avoid this unless needed.
The necessary condition is captured in skb_frag_must_loop.

I tried to make the change as obvious as possible. It should be easy
to verify that nothing changes if skb_frag_must_loop returns false.

Tested:
  On an x86 platform with
    CONFIG_HIGHMEM=y
    CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP=y
    CONFIG_NETFILTER_XT_MATCH_STRING=y

  Run
    ip link set dev lo mtu 1500
    iptables -A OUTPUT -m string --string 'badstring' -algo bm -j ACCEPT
    dd if=/dev/urandom of=in bs=1M count=20
    nc -l -p 8000 > /dev/null &
    nc -w 1 -q 0 localhost 8000 < in
Signed-off-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 29766bcf
...@@ -1203,6 +1203,7 @@ struct skb_seq_state { ...@@ -1203,6 +1203,7 @@ struct skb_seq_state {
struct sk_buff *root_skb; struct sk_buff *root_skb;
struct sk_buff *cur_skb; struct sk_buff *cur_skb;
__u8 *frag_data; __u8 *frag_data;
__u32 frag_off;
}; };
void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
......
...@@ -3442,6 +3442,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, ...@@ -3442,6 +3442,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
st->root_skb = st->cur_skb = skb; st->root_skb = st->cur_skb = skb;
st->frag_idx = st->stepped_offset = 0; st->frag_idx = st->stepped_offset = 0;
st->frag_data = NULL; st->frag_data = NULL;
st->frag_off = 0;
} }
EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_prepare_seq_read);
...@@ -3496,14 +3497,27 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, ...@@ -3496,14 +3497,27 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
st->stepped_offset += skb_headlen(st->cur_skb); st->stepped_offset += skb_headlen(st->cur_skb);
while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
unsigned int pg_idx, pg_off, pg_sz;
frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
block_limit = skb_frag_size(frag) + st->stepped_offset;
pg_idx = 0;
pg_off = skb_frag_off(frag);
pg_sz = skb_frag_size(frag);
if (skb_frag_must_loop(skb_frag_page(frag))) {
pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT;
pg_off = offset_in_page(pg_off + st->frag_off);
pg_sz = min_t(unsigned int, pg_sz - st->frag_off,
PAGE_SIZE - pg_off);
}
block_limit = pg_sz + st->stepped_offset;
if (abs_offset < block_limit) { if (abs_offset < block_limit) {
if (!st->frag_data) if (!st->frag_data)
st->frag_data = kmap_atomic(skb_frag_page(frag)); st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx);
*data = (u8 *) st->frag_data + skb_frag_off(frag) + *data = (u8 *)st->frag_data + pg_off +
(abs_offset - st->stepped_offset); (abs_offset - st->stepped_offset);
return block_limit - abs_offset; return block_limit - abs_offset;
...@@ -3514,8 +3528,12 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, ...@@ -3514,8 +3528,12 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
st->frag_data = NULL; st->frag_data = NULL;
} }
st->frag_idx++; st->stepped_offset += pg_sz;
st->stepped_offset += skb_frag_size(frag); st->frag_off += pg_sz;
if (st->frag_off == skb_frag_size(frag)) {
st->frag_off = 0;
st->frag_idx++;
}
} }
if (st->frag_data) { if (st->frag_data) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment