Commit 07e1d6b3 authored by David S. Miller's avatar David S. Miller

Merge branch 'skb_expand_head'

Vasily Averin says:

====================
skbuff: introduce skb_expand_head()

currently if skb does not have enough headroom skb_realloc_headrom is called.
It is not optimal because it creates new skb.

this patch set introduces new helper skb_expand_head()
Unlike skb_realloc_headroom, it does not allocate a new skb if possible;
copies skb->sk on new skb when as needed and frees original skb in case of failures.

This helps to simplify ip[6]_finish_output2(), ip6_xmit() and few other
functions in vrf, ax25 and bpf.

There are few other cases where this helper can be used
but it requires an additional investigations.

v3 changes:
 - ax25 compilation warning fixed
 - v5.14-rc4 rebase
 - now it does not depend on non-committed pathces

v2 changes:
 - helper's name was changed to skb_expand_head
 - fixed few mistakes inside skb_expand_head():
    skb_set_owner_w should set sk on nskb
    kfree was replaced by kfree_skb()
    improved warning message
 - added minor refactoring in changed functions in vrf and bpf patches
 - removed kfree_skb() in ax25_rt_build_path caller ax25_ip_xmit
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fa976624 a1e975e1
......@@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
bool is_v6gw = false;
int ret = -EINVAL;
nf_reset_ct(skb);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
struct sk_buff *skb2;
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
if (!skb2) {
ret = -ENOMEM;
goto err;
skb = skb_expand_head(skb, hh_len);
if (!skb) {
skb->dev->stats.tx_errors++;
return -ENOMEM;
}
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
consume_skb(skb);
skb = skb2;
}
rcu_read_lock_bh();
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
int ret;
sock_confirm_neigh(skb, neigh);
/* if crossing protocols, can not use the cached header */
ret = neigh_output(neigh, skb, is_v6gw);
......@@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
}
rcu_read_unlock_bh();
err:
vrf_tx_error(skb->dev, skb);
return ret;
return -EINVAL;
}
static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
......
......@@ -1183,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
unsigned int headroom);
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
int newtailroom, gfp_t priority);
int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
......
......@@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
skb_pull(skb, AX25_KISS_HEADER_LEN);
if (digipeat != NULL) {
if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
kfree_skb(skb);
if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
goto put;
}
skb = ourskb;
}
......
......@@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25)
void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
{
struct sk_buff *skbn;
unsigned char *ptr;
int headroom;
......@@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
headroom = ax25_addr_size(ax25->digipeat);
if (skb_headroom(skb) < headroom) {
if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
if (unlikely(skb_headroom(skb) < headroom)) {
skb = skb_expand_head(skb, headroom);
if (!skb) {
printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
kfree_skb(skb);
return;
}
if (skb->sk != NULL)
skb_set_owner_w(skbn, skb->sk);
consume_skb(skb);
skb = skbn;
}
ptr = skb_push(skb, headroom);
......
......@@ -441,24 +441,17 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
ax25_address *dest, ax25_digi *digi)
{
struct sk_buff *skbn;
unsigned char *bp;
int len;
len = digi->ndigi * AX25_ADDR_LEN;
if (skb_headroom(skb) < len) {
if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
if (unlikely(skb_headroom(skb) < len)) {
skb = skb_expand_head(skb, len);
if (!skb) {
printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
return NULL;
}
if (skb->sk != NULL)
skb_set_owner_w(skbn, skb->sk);
consume_skb(skb);
skb = skbn;
}
bp = skb_push(skb, len);
......
......@@ -2180,18 +2180,10 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
struct sk_buff *skb2;
skb2 = skb_realloc_headroom(skb, hh_len);
if (unlikely(!skb2)) {
kfree_skb(skb);
skb = skb_expand_head(skb, hh_len);
if (!skb)
return -ENOMEM;
}
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
consume_skb(skb);
skb = skb2;
}
rcu_read_lock_bh();
if (!nh) {
......@@ -2214,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
}
rcu_read_unlock_bh();
if (dst)
IP6_INC_STATS(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
kfree_skb(skb);
return -ENETDOWN;
......@@ -2287,18 +2278,10 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
struct sk_buff *skb2;
skb2 = skb_realloc_headroom(skb, hh_len);
if (unlikely(!skb2)) {
kfree_skb(skb);
skb = skb_expand_head(skb, hh_len);
if (!skb)
return -ENOMEM;
}
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
consume_skb(skb);
skb = skb2;
}
rcu_read_lock_bh();
if (!nh) {
......
......@@ -1789,6 +1789,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
/**
* skb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
* @headroom: needed headroom
*
* Unlike skb_realloc_headroom, this one does not allocate a new skb
* if possible; copies skb->sk to new skb as needed
* and frees original skb in case of failures.
*
* It expect increased headroom and generates warning otherwise.
*/
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
{
int delta = headroom - skb_headroom(skb);
if (WARN_ONCE(delta <= 0,
"%s is expecting an increase in the headroom", __func__))
return skb;
/* pskb_expand_head() might crash, if skb is shared */
if (skb_shared(skb)) {
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
if (likely(nskb)) {
if (skb->sk)
skb_set_owner_w(nskb, skb->sk);
consume_skb(skb);
} else {
kfree_skb(skb);
}
skb = nskb;
}
if (skb &&
pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
kfree_skb(skb);
skb = NULL;
}
return skb;
}
EXPORT_SYMBOL(skb_expand_head);
/**
* skb_copy_expand - copy and expand sk_buff
* @skb: buffer to copy
......
......@@ -198,20 +198,11 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
struct sk_buff *skb2;
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
if (!skb2) {
kfree_skb(skb);
skb = skb_expand_head(skb, hh_len);
if (!skb)
return -ENOMEM;
}
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
consume_skb(skb);
skb = skb2;
}
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
......
......@@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
int delta = hh_len - skb_headroom(skb);
const struct in6_addr *nexthop;
const struct in6_addr *daddr, *nexthop;
struct ipv6hdr *hdr;
struct neighbour *neigh;
int ret;
/* Be paranoid, rather than too clever. */
if (unlikely(delta > 0) && dev->header_ops) {
/* pskb_expand_head() might crash, if skb is shared */
if (skb_shared(skb)) {
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
if (likely(nskb)) {
if (skb->sk)
skb_set_owner_w(nskb, skb->sk);
consume_skb(skb);
} else {
kfree_skb(skb);
}
skb = nskb;
}
if (skb &&
pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
kfree_skb(skb);
skb = NULL;
}
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
skb = skb_expand_head(skb, hh_len);
if (!skb) {
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOMEM;
}
}
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
hdr = ipv6_hdr(skb);
daddr = &hdr->daddr;
if (ipv6_addr_is_multicast(daddr)) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
......@@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
if (hdr->hop_limit == 0) {
IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
......@@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
IPV6_ADDR_SCOPE_NODELOCAL &&
if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
!(dev->flags & IFF_LOOPBACK)) {
kfree_skb(skb);
return 0;
......@@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
rcu_read_lock_bh();
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
......@@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
rcu_read_unlock_bh();
IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
......@@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
......@@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
int hlimit = -1;
u32 mtu;
head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
if (unlikely(skb_headroom(skb) < head_room)) {
struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
if (!skb2) {
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
if (unlikely(head_room > skb_headroom(skb))) {
skb = skb_expand_head(skb, head_room);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOBUFS;
}
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
consume_skb(skb);
skb = skb2;
}
if (opt) {
......@@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
......@@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* we promote our socket to non const
*/
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, (struct sock *)sk, skb, NULL, dst->dev,
net, (struct sock *)sk, skb, NULL, dev,
dst_output);
}
skb->dev = dst->dev;
skb->dev = dev;
/* ipv6_local_error() does not require socket lock,
* we promote our socket to non const
*/
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment