Commit c1e9e01d authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset container Netfilter/IPVS update for net-next:

1) Add UDP tunnel support for ICMP errors in IPVS.

Julian Anastasov says:

This patchset is a followup to the commit that adds UDP/GUE tunnel:
"ipvs: allow tunneling with gue encapsulation".

What we do is to put tunnel real servers in hash table (patch 1),
add function to lookup tunnels (patch 2) and use it to strip the
embedded tunnel headers from ICMP errors (patch 3).

2) Extend xt_owner to match for supplementary groups, from
   Lukasz Pawelczyk.

3) Remove unused oif field in flow_offload_tuple object, from
   Taehee Yoo.

4) Release basechain counters from workqueue to skip synchronize_rcu()
   call. From Florian Westphal.

5) Replace skb_make_writable() by skb_ensure_writable(). Patchset
   from Florian Westphal.

6) Checksum support for gue encapsulation in IPVS, from Jacky Hu.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0462eaac 29930e31
...@@ -336,11 +336,6 @@ int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, ...@@ -336,11 +336,6 @@ int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
char __user *opt, int *len); char __user *opt, int *len);
#endif #endif
/* Call this before modifying an existing packet: ensures it is
modifiable and linear to the point you care about (writable_len).
Returns true or false. */
int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
struct flowi; struct flowi;
struct nf_queue_entry; struct nf_queue_entry;
......
...@@ -603,6 +603,7 @@ struct ip_vs_dest_user_kern { ...@@ -603,6 +603,7 @@ struct ip_vs_dest_user_kern {
u16 tun_type; /* tunnel type */ u16 tun_type; /* tunnel type */
__be16 tun_port; /* tunnel port */ __be16 tun_port; /* tunnel port */
u16 tun_flags; /* tunnel flags */
}; };
...@@ -665,6 +666,7 @@ struct ip_vs_dest { ...@@ -665,6 +666,7 @@ struct ip_vs_dest {
atomic_t last_weight; /* server latest weight */ atomic_t last_weight; /* server latest weight */
__u16 tun_type; /* tunnel type */ __u16 tun_type; /* tunnel type */
__be16 tun_port; /* tunnel port */ __be16 tun_port; /* tunnel port */
__u16 tun_flags; /* tunnel flags */
refcount_t refcnt; /* reference counter */ refcount_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */ struct ip_vs_stats stats; /* statistics */
...@@ -1404,6 +1406,9 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, ...@@ -1404,6 +1406,9 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
struct ip_vs_dest * struct ip_vs_dest *
ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport); const union nf_inet_addr *daddr, __be16 dport);
struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
const union nf_inet_addr *daddr,
__be16 tun_port);
int ip_vs_use_count_inc(void); int ip_vs_use_count_inc(void);
void ip_vs_use_count_dec(void); void ip_vs_use_count_dec(void);
...@@ -1497,6 +1502,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs) ...@@ -1497,6 +1502,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; } static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
#endif #endif
#define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \
IP_VS_CONN_F_FWD_MASK)
/* ip_vs_fwd_tag returns the forwarding tag of the connection */ /* ip_vs_fwd_tag returns the forwarding tag of the connection */
#define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK) #define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK)
......
...@@ -53,8 +53,6 @@ struct flow_offload_tuple { ...@@ -53,8 +53,6 @@ struct flow_offload_tuple {
u8 l4proto; u8 l4proto;
u8 dir; u8 dir;
int oifidx;
u16 mtu; u16 mtu;
struct dst_entry *dst_cache; struct dst_entry *dst_cache;
......
...@@ -131,6 +131,11 @@ enum { ...@@ -131,6 +131,11 @@ enum {
IP_VS_CONN_F_TUNNEL_TYPE_MAX, IP_VS_CONN_F_TUNNEL_TYPE_MAX,
}; };
/* Tunnel encapsulation flags */
#define IP_VS_TUNNEL_ENCAP_FLAG_NOCSUM (0)
#define IP_VS_TUNNEL_ENCAP_FLAG_CSUM (1 << 0)
#define IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM (1 << 1)
/* /*
* The struct ip_vs_service_user and struct ip_vs_dest_user are * The struct ip_vs_service_user and struct ip_vs_dest_user are
* used to set IPVS rules through setsockopt. * used to set IPVS rules through setsockopt.
...@@ -403,6 +408,8 @@ enum { ...@@ -403,6 +408,8 @@ enum {
IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */ IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */
IPVS_DEST_ATTR_TUN_FLAGS, /* tunnel flags */
__IPVS_DEST_ATTR_MAX, __IPVS_DEST_ATTR_MAX,
}; };
......
...@@ -8,6 +8,7 @@ enum { ...@@ -8,6 +8,7 @@ enum {
XT_OWNER_UID = 1 << 0, XT_OWNER_UID = 1 << 0,
XT_OWNER_GID = 1 << 1, XT_OWNER_GID = 1 << 1,
XT_OWNER_SOCKET = 1 << 2, XT_OWNER_SOCKET = 1 << 2,
XT_OWNER_SUPPL_GROUPS = 1 << 3,
}; };
struct xt_owner_match_info { struct xt_owner_match_info {
......
...@@ -22,7 +22,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -22,7 +22,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
const struct ebt_nat_info *info = par->targinfo; const struct ebt_nat_info *info = par->targinfo;
struct net_device *dev; struct net_device *dev;
if (!skb_make_writable(skb, 0)) if (skb_ensure_writable(skb, ETH_ALEN))
return EBT_DROP; return EBT_DROP;
ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);
......
...@@ -21,7 +21,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -21,7 +21,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
{ {
const struct ebt_redirect_info *info = par->targinfo; const struct ebt_redirect_info *info = par->targinfo;
if (!skb_make_writable(skb, 0)) if (skb_ensure_writable(skb, ETH_ALEN))
return EBT_DROP; return EBT_DROP;
if (xt_hooknum(par) != NF_BR_BROUTING) if (xt_hooknum(par) != NF_BR_BROUTING)
......
...@@ -22,7 +22,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -22,7 +22,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
{ {
const struct ebt_nat_info *info = par->targinfo; const struct ebt_nat_info *info = par->targinfo;
if (!skb_make_writable(skb, 0)) if (skb_ensure_writable(skb, ETH_ALEN * 2))
return EBT_DROP; return EBT_DROP;
ether_addr_copy(eth_hdr(skb)->h_source, info->mac); ether_addr_copy(eth_hdr(skb)->h_source, info->mac);
......
...@@ -17,7 +17,7 @@ target(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -17,7 +17,7 @@ target(struct sk_buff *skb, const struct xt_action_param *par)
unsigned char *arpptr; unsigned char *arpptr;
int pln, hln; int pln, hln;
if (!skb_make_writable(skb, skb->len)) if (skb_ensure_writable(skb, skb->len))
return NF_DROP; return NF_DROP;
arp = arp_hdr(skb); arp = arp_hdr(skb);
......
...@@ -32,7 +32,7 @@ set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) ...@@ -32,7 +32,7 @@ set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
__u8 oldtos; __u8 oldtos;
if (!skb_make_writable(skb, sizeof(struct iphdr))) if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return false; return false;
iph = ip_hdr(skb); iph = ip_hdr(skb);
oldtos = iph->tos; oldtos = iph->tos;
...@@ -61,7 +61,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) ...@@ -61,7 +61,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
tcph->cwr == einfo->proto.tcp.cwr)) tcph->cwr == einfo->proto.tcp.cwr))
return true; return true;
if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) if (skb_ensure_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
return false; return false;
tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb); tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
......
...@@ -59,7 +59,7 @@ static int set_addr(struct sk_buff *skb, unsigned int protoff, ...@@ -59,7 +59,7 @@ static int set_addr(struct sk_buff *skb, unsigned int protoff,
net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
return -1; return -1;
} }
/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy /* nf_nat_mangle_udp_packet uses skb_ensure_writable() to copy
* or pull everything in a linear buffer, so we can safely * or pull everything in a linear buffer, so we can safely
* use the skb pointers now */ * use the skb pointers now */
*data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
......
...@@ -186,7 +186,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, ...@@ -186,7 +186,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
return NF_DROP; return NF_DROP;
} }
if (!skb_make_writable(skb, skb->len)) { if (skb_ensure_writable(skb, skb->len)) {
nf_ct_helper_log(skb, ct, "cannot mangle packet"); nf_ct_helper_log(skb, ct, "cannot mangle packet");
return NF_DROP; return NF_DROP;
} }
......
...@@ -536,28 +536,6 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, ...@@ -536,28 +536,6 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
} }
EXPORT_SYMBOL(nf_hook_slow); EXPORT_SYMBOL(nf_hook_slow);
int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
{
if (writable_len > skb->len)
return 0;
/* Not exclusive use of packet? Must copy. */
if (!skb_cloned(skb)) {
if (writable_len <= skb_headlen(skb))
return 1;
} else if (skb_clone_writable(skb, writable_len))
return 1;
if (writable_len <= skb_headlen(skb))
writable_len = 0;
else
writable_len -= skb_headlen(skb);
return !!__pskb_pull_tail(skb, writable_len);
}
EXPORT_SYMBOL(skb_make_writable);
/* This needs to be compiled in any case to avoid dependencies between the /* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack. * nfnetlink_queue code and nf_conntrack.
*/ */
......
...@@ -363,7 +363,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, ...@@ -363,7 +363,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct tcphdr *th; struct tcphdr *th;
__u32 seq; __u32 seq;
if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
return 0; return 0;
th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
...@@ -440,7 +440,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, ...@@ -440,7 +440,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct tcphdr *th; struct tcphdr *th;
__u32 seq; __u32 seq;
if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
return 0; return 0;
th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <net/tcp.h> #include <net/tcp.h>
#include <net/udp.h> #include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */ #include <net/icmp.h> /* for icmp_send */
#include <net/gue.h>
#include <net/route.h> #include <net/route.h>
#include <net/ip6_checksum.h> #include <net/ip6_checksum.h>
#include <net/netns/generic.h> /* net_generic() */ #include <net/netns/generic.h> /* net_generic() */
...@@ -897,7 +898,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ...@@ -897,7 +898,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
IPPROTO_SCTP == protocol) IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16); offset += 2 * sizeof(__u16);
if (!skb_make_writable(skb, offset)) if (skb_ensure_writable(skb, offset))
goto out; goto out;
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
...@@ -1287,7 +1288,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, ...@@ -1287,7 +1288,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
if (!skb_make_writable(skb, iph->len)) if (skb_ensure_writable(skb, iph->len))
goto drop; goto drop;
/* mangle the packet */ /* mangle the packet */
...@@ -1579,6 +1580,41 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, ...@@ -1579,6 +1580,41 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 1; return 1;
} }
/* Check the UDP tunnel and return its header length */
static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
unsigned int offset, __u16 af,
const union nf_inet_addr *daddr, __u8 *proto)
{
struct udphdr _udph, *udph;
struct ip_vs_dest *dest;
udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
if (!udph)
goto unk;
offset += sizeof(struct udphdr);
dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest);
if (!dest)
goto unk;
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
struct guehdr _gueh, *gueh;
gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh);
if (!gueh)
goto unk;
if (gueh->control != 0 || gueh->version != 0)
goto unk;
/* Later we can support also IPPROTO_IPV6 */
if (gueh->proto_ctype != IPPROTO_IPIP)
goto unk;
*proto = gueh->proto_ctype;
return sizeof(struct udphdr) + sizeof(struct guehdr) +
(gueh->hlen << 2);
}
unk:
return 0;
}
/* /*
* Handle ICMP messages in the outside-to-inside direction (incoming). * Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections, * Find any that might be relevant, check against existing connections,
...@@ -1598,6 +1634,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, ...@@ -1598,6 +1634,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
struct ip_vs_proto_data *pd; struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict; unsigned int offset, offset2, ihl, verdict;
bool ipip, new_cp = false; bool ipip, new_cp = false;
union nf_inet_addr *raddr;
*related = 1; *related = 1;
...@@ -1636,20 +1673,51 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, ...@@ -1636,20 +1673,51 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL) if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */ return NF_ACCEPT; /* The packet looks wrong, ignore */
raddr = (union nf_inet_addr *)&cih->daddr;
/* Special case for errors for IPIP packets */ /* Special case for errors for IPIP packets */
ipip = false; ipip = false;
if (cih->protocol == IPPROTO_IPIP) { if (cih->protocol == IPPROTO_IPIP) {
struct ip_vs_dest *dest;
if (unlikely(cih->frag_off & htons(IP_OFFSET))) if (unlikely(cih->frag_off & htons(IP_OFFSET)))
return NF_ACCEPT; return NF_ACCEPT;
/* Error for our IPIP must arrive at LOCAL_IN */ /* Error for our IPIP must arrive at LOCAL_IN */
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
return NF_ACCEPT; return NF_ACCEPT;
dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0);
/* Only for known tunnel */
if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP)
return NF_ACCEPT;
offset += cih->ihl * 4; offset += cih->ihl * 4;
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL) if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */ return NF_ACCEPT; /* The packet looks wrong, ignore */
ipip = true; ipip = true;
} else if (cih->protocol == IPPROTO_UDP && /* Can be UDP encap */
/* Error for our tunnel must arrive at LOCAL_IN */
(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) {
__u8 iproto;
int ulen;
/* Non-first fragment has no UDP header */
if (unlikely(cih->frag_off & htons(IP_OFFSET)))
return NF_ACCEPT;
offset2 = offset + cih->ihl * 4;
ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET, raddr,
&iproto);
if (ulen > 0) {
/* Skip IP and UDP tunnel headers */
offset = offset2 + ulen;
/* Now we should be at the original IP header */
cih = skb_header_pointer(skb, offset, sizeof(_ciph),
&_ciph);
if (cih && cih->version == 4 && cih->ihl >= 5 &&
iproto == IPPROTO_IPIP)
ipip = true;
else
return NF_ACCEPT;
}
} }
pd = ip_vs_proto_data_get(ipvs, cih->protocol); pd = ip_vs_proto_data_get(ipvs, cih->protocol);
......
...@@ -515,15 +515,36 @@ static inline unsigned int ip_vs_rs_hashkey(int af, ...@@ -515,15 +515,36 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{ {
unsigned int hash; unsigned int hash;
__be16 port;
if (dest->in_rs_table) if (dest->in_rs_table)
return; return;
switch (IP_VS_DFWD_METHOD(dest)) {
case IP_VS_CONN_F_MASQ:
port = dest->port;
break;
case IP_VS_CONN_F_TUNNEL:
switch (dest->tun_type) {
case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
port = dest->tun_port;
break;
case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
port = 0;
break;
default:
return;
}
break;
default:
return;
}
/* /*
* Hash by proto,addr,port, * Hash by proto,addr,port,
* which are the parameters of the real service. * which are the parameters of the real service.
*/ */
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
dest->in_rs_table = 1; dest->in_rs_table = 1;
...@@ -555,7 +576,8 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, ...@@ -555,7 +576,8 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
if (dest->port == dport && if (dest->port == dport &&
dest->af == af && dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) && ip_vs_addr_equal(af, &dest->addr, daddr) &&
(dest->protocol == protocol || dest->vfwmark)) { (dest->protocol == protocol || dest->vfwmark) &&
IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
/* HIT */ /* HIT */
return true; return true;
} }
...@@ -585,7 +607,37 @@ struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, ...@@ -585,7 +607,37 @@ struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
if (dest->port == dport && if (dest->port == dport &&
dest->af == af && dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) && ip_vs_addr_equal(af, &dest->addr, daddr) &&
(dest->protocol == protocol || dest->vfwmark)) { (dest->protocol == protocol || dest->vfwmark) &&
IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
/* HIT */
return dest;
}
}
return NULL;
}
/* Find real service record by <af,addr,tun_port>.
* In case of multiple records with the same <af,addr,tun_port>, only
* the first found record is returned.
*
* To be called under RCU lock.
*/
struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
const union nf_inet_addr *daddr,
__be16 tun_port)
{
struct ip_vs_dest *dest;
unsigned int hash;
/* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, tun_port);
hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
if (dest->tun_port == tun_port &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
/* HIT */ /* HIT */
return dest; return dest;
} }
...@@ -831,24 +883,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, ...@@ -831,24 +883,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
conn_flags |= IP_VS_CONN_F_INACTIVE; conn_flags |= IP_VS_CONN_F_INACTIVE;
/* Need to rehash? */
if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
IP_VS_DFWD_METHOD(dest) ||
udest->tun_type != dest->tun_type ||
udest->tun_port != dest->tun_port)
ip_vs_rs_unhash(dest);
/* set the tunnel info */ /* set the tunnel info */
dest->tun_type = udest->tun_type; dest->tun_type = udest->tun_type;
dest->tun_port = udest->tun_port; dest->tun_port = udest->tun_port;
dest->tun_flags = udest->tun_flags;
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
conn_flags |= IP_VS_CONN_F_NOOUTPUT; conn_flags |= IP_VS_CONN_F_NOOUTPUT;
} else { } else {
/*
* Put the real service in rs_table if not present.
* For now only for NAT!
*/
ip_vs_rs_hash(ipvs, dest);
/* FTP-NAT requires conntrack for mangling */ /* FTP-NAT requires conntrack for mangling */
if (svc->port == FTPPORT) if (svc->port == FTPPORT)
ip_vs_register_conntrack(svc); ip_vs_register_conntrack(svc);
} }
atomic_set(&dest->conn_flags, conn_flags); atomic_set(&dest->conn_flags, conn_flags);
/* Put the real service in rs_table if not present. */
ip_vs_rs_hash(ipvs, dest);
/* bind the service */ /* bind the service */
old_svc = rcu_dereference_protected(dest->svc, 1); old_svc = rcu_dereference_protected(dest->svc, 1);
...@@ -2911,6 +2968,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { ...@@ -2911,6 +2968,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
[IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 },
[IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 },
[IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 },
}; };
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
...@@ -3217,6 +3275,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) ...@@ -3217,6 +3275,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
dest->tun_type) || dest->tun_type) ||
nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
dest->tun_port) || dest->tun_port) ||
nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
dest->tun_flags) ||
nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
...@@ -3337,7 +3397,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, ...@@ -3337,7 +3397,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
/* If a full entry was requested, check for the additional fields */ /* If a full entry was requested, check for the additional fields */
if (full_entry) { if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
*nla_l_thresh, *nla_tun_type, *nla_tun_port; *nla_l_thresh, *nla_tun_type, *nla_tun_port,
*nla_tun_flags;
nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
...@@ -3345,6 +3406,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, ...@@ -3345,6 +3406,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE];
nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT];
nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS];
if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
return -EINVAL; return -EINVAL;
...@@ -3360,6 +3422,9 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, ...@@ -3360,6 +3422,9 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
if (nla_tun_port) if (nla_tun_port)
udest->tun_port = nla_get_be16(nla_tun_port); udest->tun_port = nla_get_be16(nla_tun_port);
if (nla_tun_flags)
udest->tun_flags = nla_get_u16(nla_tun_flags);
} }
return 0; return 0;
......
...@@ -273,7 +273,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, ...@@ -273,7 +273,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1; return 1;
/* Linear packets are much easier to deal with. */ /* Linear packets are much easier to deal with. */
if (!skb_make_writable(skb, skb->len)) if (skb_ensure_writable(skb, skb->len))
return 0; return 0;
if (cp->app_data == (void *) IP_VS_FTP_PASV) { if (cp->app_data == (void *) IP_VS_FTP_PASV) {
...@@ -439,7 +439,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ...@@ -439,7 +439,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1; return 1;
/* Linear packets are much easier to deal with. */ /* Linear packets are much easier to deal with. */
if (!skb_make_writable(skb, skb->len)) if (skb_ensure_writable(skb, skb->len))
return 0; return 0;
data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh); data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh);
......
...@@ -101,7 +101,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -101,7 +101,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
#endif #endif
/* csum_check requires unshared skb */ /* csum_check requires unshared skb */
if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
...@@ -148,7 +148,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -148,7 +148,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
#endif #endif
/* csum_check requires unshared skb */ /* csum_check requires unshared skb */
if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
......
...@@ -163,7 +163,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -163,7 +163,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - tcphoff; oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */ /* csum_check requires unshared skb */
if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
...@@ -241,7 +241,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -241,7 +241,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - tcphoff; oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */ /* csum_check requires unshared skb */
if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
......
...@@ -153,7 +153,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -153,7 +153,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - udphoff; oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */ /* csum_check requires unshared skb */
if (!skb_make_writable(skb, udphoff+sizeof(*udph))) if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
...@@ -236,7 +236,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -236,7 +236,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - udphoff; oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */ /* csum_check requires unshared skb */
if (!skb_make_writable(skb, udphoff+sizeof(*udph))) if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
return 0; return 0;
if (unlikely(cp->app != NULL)) { if (unlikely(cp->app != NULL)) {
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/ip6_route.h> #include <net/ip6_route.h>
#include <net/ip_tunnels.h> #include <net/ip_tunnels.h>
#include <net/ip6_checksum.h>
#include <net/addrconf.h> #include <net/addrconf.h>
#include <linux/icmpv6.h> #include <linux/icmpv6.h>
#include <linux/netfilter.h> #include <linux/netfilter.h>
...@@ -279,7 +280,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, ...@@ -279,7 +280,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
} }
/* don't propagate ttl change to cloned packets */ /* don't propagate ttl change to cloned packets */
if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return false; return false;
ipv6_hdr(skb)->hop_limit--; ipv6_hdr(skb)->hop_limit--;
...@@ -294,7 +295,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, ...@@ -294,7 +295,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
} }
/* don't propagate ttl change to cloned packets */ /* don't propagate ttl change to cloned packets */
if (!skb_make_writable(skb, sizeof(struct iphdr))) if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return false; return false;
/* Decrease ttl */ /* Decrease ttl */
...@@ -385,8 +386,13 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, ...@@ -385,8 +386,13 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (!dest) if (!dest)
goto err_put; goto err_put;
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
if ((dest->tun_flags &
IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL)
mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
}
if (mtu < 68) { if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put; goto err_put;
...@@ -540,8 +546,13 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, ...@@ -540,8 +546,13 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (!dest) if (!dest)
goto err_put; goto err_put;
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr); mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
if ((dest->tun_flags &
IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL)
mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
}
if (mtu < IPV6_MIN_MTU) { if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU); IPV6_MIN_MTU);
...@@ -796,7 +807,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -796,7 +807,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
} }
/* copy-on-write the packet before mangling it */ /* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, sizeof(struct iphdr))) if (skb_ensure_writable(skb, sizeof(struct iphdr)))
goto tx_error; goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len)) if (skb_cow(skb, rt->dst.dev->hard_header_len))
...@@ -885,7 +896,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -885,7 +896,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
} }
/* copy-on-write the packet before mangling it */ /* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
goto tx_error; goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len)) if (skb_cow(skb, rt->dst.dev->hard_header_len))
...@@ -1006,17 +1017,56 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb, ...@@ -1006,17 +1017,56 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb,
__be16 sport = udp_flow_src_port(net, skb, 0, 0, false); __be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
struct udphdr *udph; /* Our new UDP header */ struct udphdr *udph; /* Our new UDP header */
struct guehdr *gueh; /* Our new GUE header */ struct guehdr *gueh; /* Our new GUE header */
size_t hdrlen, optlen = 0;
void *data;
bool need_priv = false;
if ((cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
need_priv = true;
}
skb_push(skb, sizeof(struct guehdr)); hdrlen = sizeof(struct guehdr) + optlen;
skb_push(skb, hdrlen);
gueh = (struct guehdr *)skb->data; gueh = (struct guehdr *)skb->data;
gueh->control = 0; gueh->control = 0;
gueh->version = 0; gueh->version = 0;
gueh->hlen = 0; gueh->hlen = optlen >> 2;
gueh->flags = 0; gueh->flags = 0;
gueh->proto_ctype = *next_protocol; gueh->proto_ctype = *next_protocol;
data = &gueh[1];
if (need_priv) {
__be32 *flags = data;
u16 csum_start = skb_checksum_start_offset(skb);
__be16 *pd;
gueh->flags |= GUE_FLAG_PRIV;
*flags = 0;
data += GUE_LEN_PRIV;
if (csum_start < hdrlen)
return -EINVAL;
csum_start -= hdrlen;
pd = data;
pd[0] = htons(csum_start);
pd[1] = htons(csum_start + skb->csum_offset);
if (!skb_is_gso(skb)) {
skb->ip_summed = CHECKSUM_NONE;
skb->encapsulation = 0;
}
*flags |= GUE_PFLAG_REMCSUM;
data += GUE_PLEN_REMCSUM;
}
skb_push(skb, sizeof(struct udphdr)); skb_push(skb, sizeof(struct udphdr));
skb_reset_transport_header(skb); skb_reset_transport_header(skb);
...@@ -1070,6 +1120,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1070,6 +1120,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
unsigned int max_headroom; /* The extra header space needed */ unsigned int max_headroom; /* The extra header space needed */
int ret, local; int ret, local;
int tun_type, gso_type; int tun_type, gso_type;
int tun_flags;
EnterFunction(10); EnterFunction(10);
...@@ -1092,9 +1143,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1092,9 +1143,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
tun_type = cp->dest->tun_type; tun_type = cp->dest->tun_type;
tun_flags = cp->dest->tun_flags;
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); size_t gue_hdrlen, gue_optlen = 0;
if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
}
gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
max_headroom += sizeof(struct udphdr) + gue_hdrlen;
}
/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL; dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
...@@ -1105,8 +1166,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1105,8 +1166,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error; goto tx_error;
gso_type = __tun_gso_type_mask(AF_INET, cp->af); gso_type = __tun_gso_type_mask(AF_INET, cp->af);
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
(tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
else
gso_type |= SKB_GSO_UDP_TUNNEL; gso_type |= SKB_GSO_UDP_TUNNEL;
if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
gso_type |= SKB_GSO_TUNNEL_REMCSUM;
}
}
if (iptunnel_handle_offloads(skb, gso_type)) if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error; goto tx_error;
...@@ -1115,8 +1185,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1115,8 +1185,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_set_inner_ipproto(skb, next_protocol); skb_set_inner_ipproto(skb, next_protocol);
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
ipvs_gue_encap(net, skb, cp, &next_protocol); bool check = false;
if (ipvs_gue_encap(net, skb, cp, &next_protocol))
goto tx_error;
if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
(tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
check = true;
udp_set_csum(!check, skb, saddr, cp->daddr.ip, skb->len);
}
skb_push(skb, sizeof(struct iphdr)); skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb); skb_reset_network_header(skb);
...@@ -1174,6 +1255,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1174,6 +1255,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
unsigned int max_headroom; /* The extra header space needed */ unsigned int max_headroom; /* The extra header space needed */
int ret, local; int ret, local;
int tun_type, gso_type; int tun_type, gso_type;
int tun_flags;
EnterFunction(10); EnterFunction(10);
...@@ -1197,9 +1279,19 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1197,9 +1279,19 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
tun_type = cp->dest->tun_type; tun_type = cp->dest->tun_type;
tun_flags = cp->dest->tun_flags;
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr); size_t gue_hdrlen, gue_optlen = 0;
if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
}
gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
max_headroom += sizeof(struct udphdr) + gue_hdrlen;
}
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
&next_protocol, &payload_len, &next_protocol, &payload_len,
...@@ -1208,8 +1300,17 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1208,8 +1300,17 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error; goto tx_error;
gso_type = __tun_gso_type_mask(AF_INET6, cp->af); gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
(tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
else
gso_type |= SKB_GSO_UDP_TUNNEL; gso_type |= SKB_GSO_UDP_TUNNEL;
if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
gso_type |= SKB_GSO_TUNNEL_REMCSUM;
}
}
if (iptunnel_handle_offloads(skb, gso_type)) if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error; goto tx_error;
...@@ -1218,8 +1319,18 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1218,8 +1319,18 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_set_inner_ipproto(skb, next_protocol); skb_set_inner_ipproto(skb, next_protocol);
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
ipvs_gue_encap(net, skb, cp, &next_protocol); bool check = false;
if (ipvs_gue_encap(net, skb, cp, &next_protocol))
goto tx_error;
if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
(tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
check = true;
udp6_set_csum(!check, skb, &saddr, &cp->daddr.in6, skb->len);
}
skb_push(skb, sizeof(struct ipv6hdr)); skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb); skb_reset_network_header(skb);
...@@ -1404,7 +1515,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1404,7 +1515,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
} }
/* copy-on-write the packet before mangling it */ /* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, offset)) if (skb_ensure_writable(skb, offset))
goto tx_error; goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len)) if (skb_cow(skb, rt->dst.dev->hard_header_len))
...@@ -1493,7 +1604,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ...@@ -1493,7 +1604,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
} }
/* copy-on-write the packet before mangling it */ /* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, offset)) if (skb_ensure_writable(skb, offset))
goto tx_error; goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len)) if (skb_cow(skb, rt->dst.dev->hard_header_len))
......
...@@ -339,7 +339,7 @@ static bool sctp_error(struct sk_buff *skb, ...@@ -339,7 +339,7 @@ static bool sctp_error(struct sk_buff *skb,
if (state->hook == NF_INET_PRE_ROUTING && if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum && state->net->ct.sysctl_checksum &&
skb->ip_summed == CHECKSUM_NONE) { skb->ip_summed == CHECKSUM_NONE) {
if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) { if (skb_ensure_writable(skb, dataoff + sizeof(*sh))) {
logmsg = "nf_ct_sctp: failed to read header "; logmsg = "nf_ct_sctp: failed to read header ";
goto out_invalid; goto out_invalid;
} }
......
...@@ -126,7 +126,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb, ...@@ -126,7 +126,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
optoff = protoff + sizeof(struct tcphdr); optoff = protoff + sizeof(struct tcphdr);
optend = protoff + tcph->doff * 4; optend = protoff + tcph->doff * 4;
if (!skb_make_writable(skb, optend)) if (skb_ensure_writable(skb, optend))
return 0; return 0;
tcph = (void *)skb->data + protoff; tcph = (void *)skb->data + protoff;
...@@ -176,7 +176,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb, ...@@ -176,7 +176,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
this_way = &seqadj->seq[dir]; this_way = &seqadj->seq[dir];
other_way = &seqadj->seq[!dir]; other_way = &seqadj->seq[!dir];
if (!skb_make_writable(skb, protoff + sizeof(*tcph))) if (skb_ensure_writable(skb, protoff + sizeof(*tcph)))
return 0; return 0;
tcph = (void *)skb->data + protoff; tcph = (void *)skb->data + protoff;
......
...@@ -53,7 +53,6 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, ...@@ -53,7 +53,6 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
ft->dst_port = ctt->dst.u.tcp.port; ft->dst_port = ctt->dst.u.tcp.port;
ft->iifidx = other_dst->dev->ifindex; ft->iifidx = other_dst->dev->ifindex;
ft->oifidx = dst->dev->ifindex;
ft->dst_cache = dst; ft->dst_cache = dst;
} }
......
...@@ -98,7 +98,7 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, ...@@ -98,7 +98,7 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
struct tcphdr *tcph; struct tcphdr *tcph;
int oldlen, datalen; int oldlen, datalen;
if (!skb_make_writable(skb, skb->len)) if (skb_ensure_writable(skb, skb->len))
return false; return false;
if (rep_len > match_len && if (rep_len > match_len &&
...@@ -148,7 +148,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, ...@@ -148,7 +148,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct udphdr *udph; struct udphdr *udph;
int datalen, oldlen; int datalen, oldlen;
if (!skb_make_writable(skb, skb->len)) if (skb_ensure_writable(skb, skb->len))
return false; return false;
if (rep_len > match_len && if (rep_len > match_len &&
......
...@@ -73,7 +73,7 @@ static bool udp_manip_pkt(struct sk_buff *skb, ...@@ -73,7 +73,7 @@ static bool udp_manip_pkt(struct sk_buff *skb,
struct udphdr *hdr; struct udphdr *hdr;
bool do_csum; bool do_csum;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false; return false;
hdr = (struct udphdr *)(skb->data + hdroff); hdr = (struct udphdr *)(skb->data + hdroff);
...@@ -91,7 +91,7 @@ static bool udplite_manip_pkt(struct sk_buff *skb, ...@@ -91,7 +91,7 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
#ifdef CONFIG_NF_CT_PROTO_UDPLITE #ifdef CONFIG_NF_CT_PROTO_UDPLITE
struct udphdr *hdr; struct udphdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false; return false;
hdr = (struct udphdr *)(skb->data + hdroff); hdr = (struct udphdr *)(skb->data + hdroff);
...@@ -117,7 +117,7 @@ sctp_manip_pkt(struct sk_buff *skb, ...@@ -117,7 +117,7 @@ sctp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(*hdr)) if (skb->len >= hdroff + sizeof(*hdr))
hdrsize = sizeof(*hdr); hdrsize = sizeof(*hdr);
if (!skb_make_writable(skb, hdroff + hdrsize)) if (skb_ensure_writable(skb, hdroff + hdrsize))
return false; return false;
hdr = (struct sctphdr *)(skb->data + hdroff); hdr = (struct sctphdr *)(skb->data + hdroff);
...@@ -158,7 +158,7 @@ tcp_manip_pkt(struct sk_buff *skb, ...@@ -158,7 +158,7 @@ tcp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(struct tcphdr)) if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr); hdrsize = sizeof(struct tcphdr);
if (!skb_make_writable(skb, hdroff + hdrsize)) if (skb_ensure_writable(skb, hdroff + hdrsize))
return false; return false;
hdr = (struct tcphdr *)(skb->data + hdroff); hdr = (struct tcphdr *)(skb->data + hdroff);
...@@ -198,7 +198,7 @@ dccp_manip_pkt(struct sk_buff *skb, ...@@ -198,7 +198,7 @@ dccp_manip_pkt(struct sk_buff *skb,
if (skb->len >= hdroff + sizeof(struct dccp_hdr)) if (skb->len >= hdroff + sizeof(struct dccp_hdr))
hdrsize = sizeof(struct dccp_hdr); hdrsize = sizeof(struct dccp_hdr);
if (!skb_make_writable(skb, hdroff + hdrsize)) if (skb_ensure_writable(skb, hdroff + hdrsize))
return false; return false;
hdr = (struct dccp_hdr *)(skb->data + hdroff); hdr = (struct dccp_hdr *)(skb->data + hdroff);
...@@ -232,7 +232,7 @@ icmp_manip_pkt(struct sk_buff *skb, ...@@ -232,7 +232,7 @@ icmp_manip_pkt(struct sk_buff *skb,
{ {
struct icmphdr *hdr; struct icmphdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false; return false;
hdr = (struct icmphdr *)(skb->data + hdroff); hdr = (struct icmphdr *)(skb->data + hdroff);
...@@ -250,7 +250,7 @@ icmpv6_manip_pkt(struct sk_buff *skb, ...@@ -250,7 +250,7 @@ icmpv6_manip_pkt(struct sk_buff *skb,
{ {
struct icmp6hdr *hdr; struct icmp6hdr *hdr;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false; return false;
hdr = (struct icmp6hdr *)(skb->data + hdroff); hdr = (struct icmp6hdr *)(skb->data + hdroff);
...@@ -278,7 +278,7 @@ gre_manip_pkt(struct sk_buff *skb, ...@@ -278,7 +278,7 @@ gre_manip_pkt(struct sk_buff *skb,
/* pgreh includes two optional 32bit fields which are not required /* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */ * to be there. That's where the magic '8' comes from */
if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) if (skb_ensure_writable(skb, hdroff + sizeof(*pgreh) - 8))
return false; return false;
greh = (void *)skb->data + hdroff; greh = (void *)skb->data + hdroff;
...@@ -350,7 +350,7 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, ...@@ -350,7 +350,7 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
struct iphdr *iph; struct iphdr *iph;
unsigned int hdroff; unsigned int hdroff;
if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) if (skb_ensure_writable(skb, iphdroff + sizeof(*iph)))
return false; return false;
iph = (void *)skb->data + iphdroff; iph = (void *)skb->data + iphdroff;
...@@ -381,7 +381,7 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, ...@@ -381,7 +381,7 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
int hdroff; int hdroff;
u8 nexthdr; u8 nexthdr;
if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) if (skb_ensure_writable(skb, iphdroff + sizeof(*ipv6h)))
return false; return false;
ipv6h = (void *)skb->data + iphdroff; ipv6h = (void *)skb->data + iphdroff;
...@@ -565,7 +565,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, ...@@ -565,7 +565,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0; return 0;
if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
return 0; return 0;
...@@ -787,7 +787,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, ...@@ -787,7 +787,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) if (skb_ensure_writable(skb, hdrlen + sizeof(*inside)))
return 0; return 0;
if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
return 0; return 0;
......
...@@ -285,7 +285,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, ...@@ -285,7 +285,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) { if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
struct udphdr *uh; struct udphdr *uh;
if (!skb_make_writable(skb, skb->len)) { if (skb_ensure_writable(skb, skb->len)) {
nf_ct_helper_log(skb, ct, "cannot mangle packet"); nf_ct_helper_log(skb, ct, "cannot mangle packet");
return NF_DROP; return NF_DROP;
} }
......
...@@ -196,7 +196,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, ...@@ -196,7 +196,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
optoff = protoff + sizeof(struct tcphdr); optoff = protoff + sizeof(struct tcphdr);
optend = protoff + th->doff * 4; optend = protoff + th->doff * 4;
if (!skb_make_writable(skb, optend)) if (skb_ensure_writable(skb, optend))
return 0; return 0;
while (optoff < optend) { while (optoff < optend) {
......
...@@ -1449,25 +1449,18 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) ...@@ -1449,25 +1449,18 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
return newstats; return newstats;
} }
static void nft_chain_stats_replace(struct net *net, static void nft_chain_stats_replace(struct nft_trans *trans)
struct nft_base_chain *chain,
struct nft_stats __percpu *newstats)
{ {
struct nft_stats __percpu *oldstats; struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
if (newstats == NULL) if (!nft_trans_chain_stats(trans))
return; return;
if (rcu_access_pointer(chain->stats)) { rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans),
oldstats = rcu_dereference_protected(chain->stats, lockdep_commit_lock_is_held(trans->ctx.net));
lockdep_commit_lock_is_held(net));
rcu_assign_pointer(chain->stats, newstats); if (!nft_trans_chain_stats(trans))
synchronize_rcu();
free_percpu(oldstats);
} else {
rcu_assign_pointer(chain->stats, newstats);
static_branch_inc(&nft_counters_enabled); static_branch_inc(&nft_counters_enabled);
}
} }
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
...@@ -6362,9 +6355,9 @@ static void nft_chain_commit_update(struct nft_trans *trans) ...@@ -6362,9 +6355,9 @@ static void nft_chain_commit_update(struct nft_trans *trans)
if (!nft_is_base_chain(trans->ctx.chain)) if (!nft_is_base_chain(trans->ctx.chain))
return; return;
nft_chain_stats_replace(trans);
basechain = nft_base_chain(trans->ctx.chain); basechain = nft_base_chain(trans->ctx.chain);
nft_chain_stats_replace(trans->ctx.net, basechain,
nft_trans_chain_stats(trans));
switch (nft_trans_chain_policy(trans)) { switch (nft_trans_chain_policy(trans)) {
case NF_DROP: case NF_DROP:
...@@ -6381,6 +6374,7 @@ static void nft_commit_release(struct nft_trans *trans) ...@@ -6381,6 +6374,7 @@ static void nft_commit_release(struct nft_trans *trans)
nf_tables_table_destroy(&trans->ctx); nf_tables_table_destroy(&trans->ctx);
break; break;
case NFT_MSG_NEWCHAIN: case NFT_MSG_NEWCHAIN:
free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans)); kfree(nft_trans_chain_name(trans));
break; break;
case NFT_MSG_DELCHAIN: case NFT_MSG_DELCHAIN:
......
...@@ -863,7 +863,7 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) ...@@ -863,7 +863,7 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
} }
skb_put(e->skb, diff); skb_put(e->skb, diff);
} }
if (!skb_make_writable(e->skb, data_len)) if (skb_ensure_writable(e->skb, data_len))
return -ENOMEM; return -ENOMEM;
skb_copy_to_linear_data(e->skb, data, data_len); skb_copy_to_linear_data(e->skb, data, data_len);
e->skb->ip_summed = CHECKSUM_NONE; e->skb->ip_summed = CHECKSUM_NONE;
......
...@@ -156,7 +156,8 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, ...@@ -156,7 +156,8 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (i + optl > tcphdr_len || priv->len + priv->offset > optl) if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
return; return;
if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len)) if (skb_ensure_writable(pkt->skb,
pkt->xt.thoff + i + priv->len))
return; return;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
......
...@@ -243,7 +243,7 @@ static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt, ...@@ -243,7 +243,7 @@ static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt,
tsum)); tsum));
} }
if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) || if (skb_ensure_writable(skb, l4csum_offset + sizeof(sum)) ||
skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0) skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0)
return -1; return -1;
...@@ -259,7 +259,7 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src, ...@@ -259,7 +259,7 @@ static int nft_payload_csum_inet(struct sk_buff *skb, const u32 *src,
return -1; return -1;
nft_csum_replace(&sum, fsum, tsum); nft_csum_replace(&sum, fsum, tsum);
if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || if (skb_ensure_writable(skb, csum_offset + sizeof(sum)) ||
skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
return -1; return -1;
...@@ -312,7 +312,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr, ...@@ -312,7 +312,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
goto err; goto err;
} }
if (!skb_make_writable(skb, max(offset + priv->len, 0)) || if (skb_ensure_writable(skb, max(offset + priv->len, 0)) ||
skb_store_bits(skb, offset, src, priv->len) < 0) skb_store_bits(skb, offset, src, priv->len) < 0)
goto err; goto err;
......
...@@ -34,7 +34,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -34,7 +34,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) { if (dscp != dinfo->dscp) {
if (!skb_make_writable(skb, sizeof(struct iphdr))) if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP; return NF_DROP;
ipv4_change_dsfield(ip_hdr(skb), ipv4_change_dsfield(ip_hdr(skb),
...@@ -52,7 +52,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -52,7 +52,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) { if (dscp != dinfo->dscp) {
if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return NF_DROP; return NF_DROP;
ipv6_change_dsfield(ipv6_hdr(skb), ipv6_change_dsfield(ipv6_hdr(skb),
...@@ -82,7 +82,7 @@ tos_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -82,7 +82,7 @@ tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
nv = (orig & ~info->tos_mask) ^ info->tos_value; nv = (orig & ~info->tos_mask) ^ info->tos_value;
if (orig != nv) { if (orig != nv) {
if (!skb_make_writable(skb, sizeof(struct iphdr))) if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP; return NF_DROP;
iph = ip_hdr(skb); iph = ip_hdr(skb);
ipv4_change_dsfield(iph, 0, nv); ipv4_change_dsfield(iph, 0, nv);
...@@ -102,7 +102,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -102,7 +102,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
nv = (orig & ~info->tos_mask) ^ info->tos_value; nv = (orig & ~info->tos_mask) ^ info->tos_value;
if (orig != nv) { if (orig != nv) {
if (!skb_make_writable(skb, sizeof(struct iphdr))) if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return NF_DROP; return NF_DROP;
iph = ipv6_hdr(skb); iph = ipv6_hdr(skb);
ipv6_change_dsfield(iph, 0, nv); ipv6_change_dsfield(iph, 0, nv);
......
...@@ -32,7 +32,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -32,7 +32,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
const struct ipt_TTL_info *info = par->targinfo; const struct ipt_TTL_info *info = par->targinfo;
int new_ttl; int new_ttl;
if (!skb_make_writable(skb, skb->len)) if (skb_ensure_writable(skb, sizeof(*iph)))
return NF_DROP; return NF_DROP;
iph = ip_hdr(skb); iph = ip_hdr(skb);
...@@ -72,7 +72,7 @@ hl_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -72,7 +72,7 @@ hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
const struct ip6t_HL_info *info = par->targinfo; const struct ip6t_HL_info *info = par->targinfo;
int new_hl; int new_hl;
if (!skb_make_writable(skb, skb->len)) if (skb_ensure_writable(skb, sizeof(*ip6h)))
return NF_DROP; return NF_DROP;
ip6h = ipv6_hdr(skb); ip6h = ipv6_hdr(skb);
......
...@@ -89,7 +89,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, ...@@ -89,7 +89,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
if (par->fragoff != 0) if (par->fragoff != 0)
return 0; return 0;
if (!skb_make_writable(skb, skb->len)) if (skb_ensure_writable(skb, skb->len))
return -1; return -1;
len = skb->len - tcphoff; len = skb->len - tcphoff;
......
...@@ -31,33 +31,33 @@ static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) ...@@ -31,33 +31,33 @@ static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset)
static unsigned int static unsigned int
tcpoptstrip_mangle_packet(struct sk_buff *skb, tcpoptstrip_mangle_packet(struct sk_buff *skb,
const struct xt_action_param *par, const struct xt_action_param *par,
unsigned int tcphoff, unsigned int minlen) unsigned int tcphoff)
{ {
const struct xt_tcpoptstrip_target_info *info = par->targinfo; const struct xt_tcpoptstrip_target_info *info = par->targinfo;
struct tcphdr *tcph, _th;
unsigned int optl, i, j; unsigned int optl, i, j;
struct tcphdr *tcph;
u_int16_t n, o; u_int16_t n, o;
u_int8_t *opt; u_int8_t *opt;
int len, tcp_hdrlen; int tcp_hdrlen;
/* This is a fragment, no TCP header is available */ /* This is a fragment, no TCP header is available */
if (par->fragoff != 0) if (par->fragoff != 0)
return XT_CONTINUE; return XT_CONTINUE;
if (!skb_make_writable(skb, skb->len)) tcph = skb_header_pointer(skb, tcphoff, sizeof(_th), &_th);
if (!tcph)
return NF_DROP; return NF_DROP;
len = skb->len - tcphoff;
if (len < (int)sizeof(struct tcphdr))
return NF_DROP;
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
tcp_hdrlen = tcph->doff * 4; tcp_hdrlen = tcph->doff * 4;
if (tcp_hdrlen < sizeof(struct tcphdr))
return NF_DROP;
if (len < tcp_hdrlen) if (skb_ensure_writable(skb, tcphoff + tcp_hdrlen))
return NF_DROP; return NF_DROP;
opt = (u_int8_t *)tcph; /* must reload tcph, might have been moved */
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
opt = (u8 *)tcph;
/* /*
* Walk through all TCP options - if we find some option to remove, * Walk through all TCP options - if we find some option to remove,
...@@ -91,8 +91,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, ...@@ -91,8 +91,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
static unsigned int static unsigned int
tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par) tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{ {
return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb), return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
sizeof(struct iphdr) + sizeof(struct tcphdr));
} }
#if IS_ENABLED(CONFIG_IP6_NF_MANGLE) #if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
...@@ -109,8 +108,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -109,8 +108,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (tcphoff < 0) if (tcphoff < 0)
return NF_DROP; return NF_DROP;
return tcpoptstrip_mangle_packet(skb, par, tcphoff, return tcpoptstrip_mangle_packet(skb, par, tcphoff);
sizeof(*ipv6h) + sizeof(struct tcphdr));
} }
#endif #endif
......
...@@ -91,11 +91,28 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) ...@@ -91,11 +91,28 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
} }
if (info->match & XT_OWNER_GID) { if (info->match & XT_OWNER_GID) {
unsigned int i, match = false;
kgid_t gid_min = make_kgid(net->user_ns, info->gid_min); kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
kgid_t gid_max = make_kgid(net->user_ns, info->gid_max); kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
if ((gid_gte(filp->f_cred->fsgid, gid_min) && struct group_info *gi = filp->f_cred->group_info;
gid_lte(filp->f_cred->fsgid, gid_max)) ^
!(info->invert & XT_OWNER_GID)) if (gid_gte(filp->f_cred->fsgid, gid_min) &&
gid_lte(filp->f_cred->fsgid, gid_max))
match = true;
if (!match && (info->match & XT_OWNER_SUPPL_GROUPS) && gi) {
for (i = 0; i < gi->ngroups; ++i) {
kgid_t group = gi->gid[i];
if (gid_gte(group, gid_min) &&
gid_lte(group, gid_max)) {
match = true;
break;
}
}
}
if (match ^ !(info->invert & XT_OWNER_GID))
return false; return false;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment