Commit 7a27f6ab authored by Pablo Neira Ayuso's avatar Pablo Neira Ayuso Committed by David S. Miller

netfilter: flowtable: use dev_fill_forward_path() to obtain egress device

The egress device in the tuple is obtained from route. Use
dev_fill_forward_path() instead to provide the real egress device for
this flow whenever this is available.

The new FLOW_OFFLOAD_XMIT_DIRECT type uses dev_queue_xmit() to transmit
ethernet frames. Cache the source and destination hardware address to
use dev_queue_xmit() to transfer packets.

The FLOW_OFFLOAD_XMIT_DIRECT replaces FLOW_OFFLOAD_XMIT_NEIGH if
dev_fill_forward_path() finds a direct transmit path.

In case of topology updates, if peer is moved to different bridge port,
the connection will time out, reconnect will result in a new entry with
the correct path. Snooping fdb updates would allow for cleaning up stale
flowtable entries.
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c63a7cc4
...@@ -92,6 +92,7 @@ enum flow_offload_tuple_dir { ...@@ -92,6 +92,7 @@ enum flow_offload_tuple_dir {
enum flow_offload_xmit_type { enum flow_offload_xmit_type {
FLOW_OFFLOAD_XMIT_NEIGH = 0, FLOW_OFFLOAD_XMIT_NEIGH = 0,
FLOW_OFFLOAD_XMIT_XFRM, FLOW_OFFLOAD_XMIT_XFRM,
FLOW_OFFLOAD_XMIT_DIRECT,
}; };
struct flow_offload_tuple { struct flow_offload_tuple {
...@@ -120,8 +121,14 @@ struct flow_offload_tuple { ...@@ -120,8 +121,14 @@ struct flow_offload_tuple {
xmit_type:2; xmit_type:2;
u16 mtu; u16 mtu;
union {
struct dst_entry *dst_cache; struct dst_entry *dst_cache;
struct {
u32 ifidx;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
} out;
};
}; };
struct flow_offload_tuple_rhash { struct flow_offload_tuple_rhash {
...@@ -168,6 +175,11 @@ struct nf_flow_route { ...@@ -168,6 +175,11 @@ struct nf_flow_route {
struct { struct {
u32 ifindex; u32 ifindex;
} in; } in;
struct {
u32 ifindex;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
} out;
enum flow_offload_xmit_type xmit_type; enum flow_offload_xmit_type xmit_type;
} tuple[FLOW_OFFLOAD_DIR_MAX]; } tuple[FLOW_OFFLOAD_DIR_MAX];
}; };
......
...@@ -81,9 +81,6 @@ static int flow_offload_fill_route(struct flow_offload *flow, ...@@ -81,9 +81,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
struct dst_entry *dst = route->tuple[dir].dst; struct dst_entry *dst = route->tuple[dir].dst;
if (!dst_hold_safe(route->tuple[dir].dst))
return -1;
switch (flow_tuple->l3proto) { switch (flow_tuple->l3proto) {
case NFPROTO_IPV4: case NFPROTO_IPV4:
flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
...@@ -94,12 +91,36 @@ static int flow_offload_fill_route(struct flow_offload *flow, ...@@ -94,12 +91,36 @@ static int flow_offload_fill_route(struct flow_offload *flow,
} }
flow_tuple->iifidx = route->tuple[dir].in.ifindex; flow_tuple->iifidx = route->tuple[dir].in.ifindex;
switch (route->tuple[dir].xmit_type) {
case FLOW_OFFLOAD_XMIT_DIRECT:
memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
ETH_ALEN);
memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
ETH_ALEN);
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
if (!dst_hold_safe(route->tuple[dir].dst))
return -1;
flow_tuple->dst_cache = dst;
break;
}
flow_tuple->xmit_type = route->tuple[dir].xmit_type; flow_tuple->xmit_type = route->tuple[dir].xmit_type;
flow_tuple->dst_cache = dst;
return 0; return 0;
} }
static void nft_flow_dst_release(struct flow_offload *flow,
enum flow_offload_tuple_dir dir)
{
if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
dst_release(flow->tuplehash[dir].tuple.dst_cache);
}
int flow_offload_route_init(struct flow_offload *flow, int flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route) const struct nf_flow_route *route)
{ {
...@@ -118,7 +139,7 @@ int flow_offload_route_init(struct flow_offload *flow, ...@@ -118,7 +139,7 @@ int flow_offload_route_init(struct flow_offload *flow,
return 0; return 0;
err_route_reply: err_route_reply:
dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
return err; return err;
} }
...@@ -169,8 +190,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct) ...@@ -169,8 +190,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
static void flow_offload_route_release(struct flow_offload *flow) static void flow_offload_route_release(struct flow_offload *flow)
{ {
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
} }
void flow_offload_free(struct flow_offload *flow) void flow_offload_free(struct flow_offload *flow)
......
...@@ -207,6 +207,24 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, ...@@ -207,6 +207,24 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN; return NF_STOLEN;
} }
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
const struct flow_offload_tuple_rhash *tuplehash,
unsigned short type)
{
struct net_device *outdev;
outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
if (!outdev)
return NF_DROP;
skb->dev = outdev;
dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
tuplehash->tuple.out.h_source, skb->len);
dev_queue_xmit(skb);
return NF_STOLEN;
}
unsigned int unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state) const struct nf_hook_state *state)
...@@ -222,6 +240,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -222,6 +240,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
struct iphdr *iph; struct iphdr *iph;
__be32 nexthop; __be32 nexthop;
u32 hdrsize; u32 hdrsize;
int ret;
if (skb->protocol != htons(ETH_P_IP)) if (skb->protocol != htons(ETH_P_IP))
return NF_ACCEPT; return NF_ACCEPT;
...@@ -244,9 +263,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -244,9 +263,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT; return NF_ACCEPT;
if (!dst_check(&rt->dst, 0)) { if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
flow_offload_teardown(flow); tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
return NF_ACCEPT; rt = (struct rtable *)tuplehash->tuple.dst_cache;
if (!dst_check(&rt->dst, 0)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
} }
if (skb_try_make_writable(skb, thoff + hdrsize)) if (skb_try_make_writable(skb, thoff + hdrsize))
...@@ -263,8 +286,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -263,8 +286,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER) if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
rt = (struct rtable *)tuplehash->tuple.dst_cache;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
memset(skb->cb, 0, sizeof(struct inet_skb_parm)); memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex; IPCB(skb)->iif = skb->dev->ifindex;
...@@ -272,13 +293,23 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ...@@ -272,13 +293,23 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return nf_flow_xmit_xfrm(skb, state, &rt->dst); return nf_flow_xmit_xfrm(skb, state, &rt->dst);
} }
outdev = rt->dst.dev; switch (tuplehash->tuple.xmit_type) {
skb->dev = outdev; case FLOW_OFFLOAD_XMIT_NEIGH:
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); outdev = rt->dst.dev;
skb_dst_set_noref(skb, &rt->dst); skb->dev = outdev;
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
skb_dst_set_noref(skb, &rt->dst);
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
if (ret == NF_DROP)
flow_offload_teardown(flow);
break;
}
return NF_STOLEN; return ret;
} }
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
...@@ -444,6 +475,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ...@@ -444,6 +475,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
struct ipv6hdr *ip6h; struct ipv6hdr *ip6h;
struct rt6_info *rt; struct rt6_info *rt;
u32 hdrsize; u32 hdrsize;
int ret;
if (skb->protocol != htons(ETH_P_IPV6)) if (skb->protocol != htons(ETH_P_IPV6))
return NF_ACCEPT; return NF_ACCEPT;
...@@ -465,9 +497,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ...@@ -465,9 +497,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
sizeof(*ip6h))) sizeof(*ip6h)))
return NF_ACCEPT; return NF_ACCEPT;
if (!dst_check(&rt->dst, 0)) { if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
flow_offload_teardown(flow); tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
return NF_ACCEPT; rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
if (!dst_check(&rt->dst, 0)) {
flow_offload_teardown(flow);
return NF_ACCEPT;
}
} }
if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize)) if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
...@@ -484,8 +520,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ...@@ -484,8 +520,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER) if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len); nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
IP6CB(skb)->iif = skb->dev->ifindex; IP6CB(skb)->iif = skb->dev->ifindex;
...@@ -493,12 +527,22 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ...@@ -493,12 +527,22 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
return nf_flow_xmit_xfrm(skb, state, &rt->dst); return nf_flow_xmit_xfrm(skb, state, &rt->dst);
} }
outdev = rt->dst.dev; switch (tuplehash->tuple.xmit_type) {
skb->dev = outdev; case FLOW_OFFLOAD_XMIT_NEIGH:
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); outdev = rt->dst.dev;
skb_dst_set_noref(skb, &rt->dst); skb->dev = outdev;
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
skb_dst_set_noref(skb, &rt->dst);
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
ret = NF_STOLEN;
break;
case FLOW_OFFLOAD_XMIT_DIRECT:
ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
if (ret == NF_DROP)
flow_offload_teardown(flow);
break;
}
return NF_STOLEN; return ret;
} }
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
...@@ -39,12 +39,11 @@ static void nft_default_forward_path(struct nf_flow_route *route, ...@@ -39,12 +39,11 @@ static void nft_default_forward_path(struct nf_flow_route *route,
static int nft_dev_fill_forward_path(const struct nf_flow_route *route, static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
const struct dst_entry *dst_cache, const struct dst_entry *dst_cache,
const struct nf_conn *ct, const struct nf_conn *ct,
enum ip_conntrack_dir dir, enum ip_conntrack_dir dir, u8 *ha,
struct net_device_path_stack *stack) struct net_device_path_stack *stack)
{ {
const void *daddr = &ct->tuplehash[!dir].tuple.src.u3; const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
struct net_device *dev = dst_cache->dev; struct net_device *dev = dst_cache->dev;
unsigned char ha[ETH_ALEN];
struct neighbour *n; struct neighbour *n;
u8 nud_state; u8 nud_state;
...@@ -66,27 +65,43 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route, ...@@ -66,27 +65,43 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
struct nft_forward_info { struct nft_forward_info {
const struct net_device *indev; const struct net_device *indev;
const struct net_device *outdev;
u8 h_source[ETH_ALEN];
u8 h_dest[ETH_ALEN];
enum flow_offload_xmit_type xmit_type;
}; };
static void nft_dev_path_info(const struct net_device_path_stack *stack, static void nft_dev_path_info(const struct net_device_path_stack *stack,
struct nft_forward_info *info) struct nft_forward_info *info,
unsigned char *ha)
{ {
const struct net_device_path *path; const struct net_device_path *path;
int i; int i;
memcpy(info->h_dest, ha, ETH_ALEN);
for (i = 0; i < stack->num_paths; i++) { for (i = 0; i < stack->num_paths; i++) {
path = &stack->path[i]; path = &stack->path[i];
switch (path->type) { switch (path->type) {
case DEV_PATH_ETHERNET: case DEV_PATH_ETHERNET:
info->indev = path->dev; info->indev = path->dev;
if (is_zero_ether_addr(info->h_source))
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
break; break;
case DEV_PATH_VLAN:
case DEV_PATH_BRIDGE: case DEV_PATH_BRIDGE:
if (is_zero_ether_addr(info->h_source))
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
break;
case DEV_PATH_VLAN:
default: default:
info->indev = NULL; info->indev = NULL;
break; break;
} }
} }
if (!info->outdev)
info->outdev = info->indev;
} }
static bool nft_flowtable_find_dev(const struct net_device *dev, static bool nft_flowtable_find_dev(const struct net_device *dev,
...@@ -114,14 +129,22 @@ static void nft_dev_forward_path(struct nf_flow_route *route, ...@@ -114,14 +129,22 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
const struct dst_entry *dst = route->tuple[dir].dst; const struct dst_entry *dst = route->tuple[dir].dst;
struct net_device_path_stack stack; struct net_device_path_stack stack;
struct nft_forward_info info = {}; struct nft_forward_info info = {};
unsigned char ha[ETH_ALEN];
if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0) if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
nft_dev_path_info(&stack, &info); nft_dev_path_info(&stack, &info, ha);
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft)) if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
return; return;
route->tuple[!dir].in.ifindex = info.indev->ifindex; route->tuple[!dir].in.ifindex = info.indev->ifindex;
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
route->tuple[dir].out.ifindex = info.outdev->ifindex;
route->tuple[dir].xmit_type = info.xmit_type;
}
} }
static int nft_flow_route(const struct nft_pktinfo *pkt, static int nft_flow_route(const struct nft_pktinfo *pkt,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment