Commit b2140e97 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-add-helper-support-in-tc-act_ct-for-ovs-offloading'

Xin Long says:

====================
net: add helper support in tc act_ct for ovs offloading

Ilya reported an issue that FTP traffic would be broken when the OVS flow
with ct(commit,alg=ftp) installed in the OVS kernel module, and it was
caused by that TC didn't support the ftp helper offloaded from OVS.

This patchset is to add the helper support in act_ct for OVS offloading
in kernel net/sched.

The 1st and 2nd patches move some common code into nf_conntrack_helper from
openvswitch so that they could be used by net/sched in the 4th patch (Note
there are still some other common code used in both OVS and TC, and I will
extract it in other patches). The 3rd patch extracts another function in
net/sched to make the 4th patch easier to write. The 4th patch adds this
feature in net/sched.

The user space part will be added in another patch, and with it these OVS
flows (FTP over SNAT) can be used to test this feature:

  table=0, in_port=veth1,tcp,tcp_dst=2121,ct_state=-trk \
    actions=ct(table=1, nat), normal
  table=0, in_port=veth2,tcp,ct_state=-trk actions=ct(table=1, nat)
  table=0, in_port=veth1,tcp,ct_state=-trk actions=ct(table=0, nat)
  table=0, in_port=veth1,tcp,ct_state=+trk+rel actions=ct(commit, nat),normal
  table=0, in_port=veth1,tcp,ct_state=+trk+est actions=veth2"

  table=1, in_port=veth1,tcp,tcp_dst=2121,ct_state=+trk+new \
    actions=ct(commit, nat(src=7.7.16.1), alg=ftp),normal"
  table=1, in_port=veth1,tcp,tcp_dst=2121,ct_state=+trk+est actions=veth2"
  table=1, in_port=veth2,tcp,ct_state=+trk+est actions=veth1"

====================

Link: https://lore.kernel.org/r/cover.1667766782.git.lucien.xin@gmail.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 47f3ecf4 a21b06e7
......@@ -115,6 +115,11 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
gfp_t flags);
int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, u16 proto);
int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
u8 proto, bool nat, struct nf_conntrack_helper **hp);
void nf_ct_helper_destroy(struct nf_conn *ct);
static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
......
......@@ -10,6 +10,7 @@
#include <net/netfilter/nf_conntrack_labels.h>
struct tcf_ct_params {
struct nf_conntrack_helper *helper;
struct nf_conn *tmpl;
u16 zone;
......
......@@ -22,6 +22,9 @@ enum {
TCA_CT_NAT_PORT_MIN, /* be16 */
TCA_CT_NAT_PORT_MAX, /* be16 */
TCA_CT_PAD,
TCA_CT_HELPER_NAME, /* string */
TCA_CT_HELPER_FAMILY, /* u8 */
TCA_CT_HELPER_PROTO, /* u8 */
__TCA_CT_MAX
};
......
......@@ -26,7 +26,9 @@
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_log.h>
#include <net/ip.h>
static DEFINE_MUTEX(nf_ct_helper_mutex);
struct hlist_head *nf_ct_helper_hash __read_mostly;
......@@ -240,6 +242,104 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
}
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
/* 'skb' should already be pulled to nh_ofs. */
int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, u16 proto)
{
const struct nf_conntrack_helper *helper;
const struct nf_conn_help *help;
unsigned int protoff;
int err;
if (ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
if (helper->tuple.src.l3num != NFPROTO_UNSPEC &&
helper->tuple.src.l3num != proto)
return NF_ACCEPT;
switch (proto) {
case NFPROTO_IPV4:
protoff = ip_hdrlen(skb);
proto = ip_hdr(skb)->protocol;
break;
case NFPROTO_IPV6: {
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
__be16 frag_off;
int ofs;
ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
protoff = ofs;
proto = nexthdr;
break;
}
default:
WARN_ONCE(1, "helper invoked on non-IP family!");
return NF_DROP;
}
if (helper->tuple.dst.protonum != proto)
return NF_ACCEPT;
err = helper->help(skb, protoff, ct, ctinfo);
if (err != NF_ACCEPT)
return err;
/* Adjust seqs after helper. This is needed due to some helpers (e.g.,
* FTP with NAT) adusting the TCP payload size when mangling IP
* addresses and/or port numbers in the text-based control connection.
*/
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
return NF_DROP;
return NF_ACCEPT;
}
EXPORT_SYMBOL_GPL(nf_ct_helper);
int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
u8 proto, bool nat, struct nf_conntrack_helper **hp)
{
struct nf_conntrack_helper *helper;
struct nf_conn_help *help;
int ret = 0;
helper = nf_conntrack_helper_try_module_get(name, family, proto);
if (!helper)
return -EINVAL;
help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
if (!help) {
nf_conntrack_helper_put(helper);
return -ENOMEM;
}
#if IS_ENABLED(CONFIG_NF_NAT)
if (nat) {
ret = nf_nat_helper_try_module_get(name, family, proto);
if (ret) {
nf_conntrack_helper_put(helper);
return ret;
}
}
#endif
rcu_assign_pointer(help->helper, helper);
*hp = helper;
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_add_helper);
/* appropriate ct lock protecting must be taken by caller */
static int unhelp(struct nf_conn *ct, void *me)
{
......
......@@ -434,65 +434,6 @@ static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
return 0;
}
/* 'skb' should already be pulled to nh_ofs. */
static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
{
const struct nf_conntrack_helper *helper;
const struct nf_conn_help *help;
enum ip_conntrack_info ctinfo;
unsigned int protoff;
struct nf_conn *ct;
int err;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
switch (proto) {
case NFPROTO_IPV4:
protoff = ip_hdrlen(skb);
break;
case NFPROTO_IPV6: {
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
__be16 frag_off;
int ofs;
ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
protoff = ofs;
break;
}
default:
WARN_ONCE(1, "helper invoked on non-IP family!");
return NF_DROP;
}
err = helper->help(skb, protoff, ct, ctinfo);
if (err != NF_ACCEPT)
return err;
/* Adjust seqs after helper. This is needed due to some helpers (e.g.,
* FTP with NAT) adusting the TCP payload size when mangling IP
* addresses and/or port numbers in the text-based control connection.
*/
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
return NF_DROP;
return NF_ACCEPT;
}
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
* value if 'skb' is freed.
*/
......@@ -1038,7 +979,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
*/
if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
info->commit) &&
ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) {
return -EINVAL;
}
......@@ -1350,43 +1291,6 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
return 0;
}
static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
const struct sw_flow_key *key, bool log)
{
struct nf_conntrack_helper *helper;
struct nf_conn_help *help;
int ret = 0;
helper = nf_conntrack_helper_try_module_get(name, info->family,
key->ip.proto);
if (!helper) {
OVS_NLERR(log, "Unknown helper \"%s\"", name);
return -EINVAL;
}
help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
if (!help) {
nf_conntrack_helper_put(helper);
return -ENOMEM;
}
#if IS_ENABLED(CONFIG_NF_NAT)
if (info->nat) {
ret = nf_nat_helper_try_module_get(name, info->family,
key->ip.proto);
if (ret) {
nf_conntrack_helper_put(helper);
OVS_NLERR(log, "Failed to load \"%s\" NAT helper, error: %d",
name, ret);
return ret;
}
}
#endif
rcu_assign_pointer(help->helper, helper);
info->helper = helper;
return ret;
}
#if IS_ENABLED(CONFIG_NF_NAT)
static int parse_nat(const struct nlattr *attr,
struct ovs_conntrack_info *info, bool log)
......@@ -1720,10 +1624,13 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
}
if (helper) {
err = ovs_ct_add_helper(&ct_info, helper, key, log);
if (err)
err = nf_ct_add_helper(ct_info.ct, helper, ct_info.family,
key->ip.proto, ct_info.nat, &ct_info.helper);
if (err) {
OVS_NLERR(log, "Failed to add %s helper %d", helper, err);
goto err_free_ct;
}
}
err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
sizeof(ct_info), log);
......
......@@ -33,6 +33,7 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_conntrack_act_ct.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <uapi/linux/netfilter/nf_nat.h>
static struct workqueue_struct *act_ct_wq;
......@@ -345,11 +346,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
module_put(THIS_MODULE);
}
static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft)
{
struct tcf_ct_flow_table *ct_ft = params->ct_ft;
if (refcount_dec_and_test(&params->ct_ft->ref)) {
if (refcount_dec_and_test(&ct_ft->ref)) {
rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
queue_rcu_work(act_ct_wq, &ct_ft->rwork);
......@@ -657,7 +656,7 @@ struct tc_ct_action_net {
/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
u16 zone_id, bool force)
struct tcf_ct_params *p)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
......@@ -667,11 +666,19 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
return false;
if (!net_eq(net, read_pnet(&ct->ct_net)))
goto drop_ct;
if (nf_ct_zone(ct)->id != zone_id)
if (nf_ct_zone(ct)->id != p->zone)
goto drop_ct;
if (p->helper) {
struct nf_conn_help *help;
help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
if (help && rcu_access_pointer(help->helper) != p->helper)
goto drop_ct;
}
/* Force conntrack entry direction. */
if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
if ((p->ct_action & TCA_CT_ACT_FORCE) &&
CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
if (nf_ct_is_confirmed(ct))
nf_ct_kill(ct);
......@@ -832,18 +839,30 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
}
static void tcf_ct_params_free(struct rcu_head *head)
static void tcf_ct_params_free(struct tcf_ct_params *params)
{
struct tcf_ct_params *params = container_of(head,
struct tcf_ct_params, rcu);
tcf_ct_flow_table_put(params);
if (params->helper) {
#if IS_ENABLED(CONFIG_NF_NAT)
if (params->ct_action & TCA_CT_ACT_NAT)
nf_nat_helper_put(params->helper);
#endif
nf_conntrack_helper_put(params->helper);
}
if (params->ct_ft)
tcf_ct_flow_table_put(params->ct_ft);
if (params->tmpl)
nf_ct_put(params->tmpl);
kfree(params);
}
static void tcf_ct_params_free_rcu(struct rcu_head *head)
{
struct tcf_ct_params *params;
params = container_of(head, struct tcf_ct_params, rcu);
tcf_ct_params_free(params);
}
#if IS_ENABLED(CONFIG_NF_NAT)
/* Modelled after nf_nat_ipv[46]_fn().
* range is only used for new, uninitialized NAT state.
......@@ -1023,13 +1042,14 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct net *net = dev_net(skb->dev);
bool cached, commit, clear, force;
enum ip_conntrack_info ctinfo;
struct tcf_ct *c = to_ct(a);
struct nf_conn *tmpl = NULL;
struct nf_hook_state state;
bool cached, commit, clear;
int nh_ofs, err, retval;
struct tcf_ct_params *p;
bool add_helper = false;
bool skip_add = false;
bool defrag = false;
struct nf_conn *ct;
......@@ -1040,7 +1060,6 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
retval = READ_ONCE(c->tcf_action);
commit = p->ct_action & TCA_CT_ACT_COMMIT;
clear = p->ct_action & TCA_CT_ACT_CLEAR;
force = p->ct_action & TCA_CT_ACT_FORCE;
tmpl = p->tmpl;
tcf_lastuse_update(&c->tcf_tm);
......@@ -1083,7 +1102,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
* actually run the packet through conntrack twice unless it's for a
* different zone.
*/
cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
cached = tcf_ct_skb_nfct_cached(net, skb, p);
if (!cached) {
if (tcf_ct_flow_table_lookup(p, skb, family)) {
skip_add = true;
......@@ -1116,6 +1135,22 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
if (err != NF_ACCEPT)
goto drop;
if (!nf_ct_is_confirmed(ct) && commit && p->helper && !nfct_help(ct)) {
err = __nf_ct_try_assign_helper(ct, p->tmpl, GFP_ATOMIC);
if (err)
goto drop;
add_helper = true;
if (p->ct_action & TCA_CT_ACT_NAT && !nfct_seqadj(ct)) {
if (!nfct_seqadj_ext_add(ct))
goto drop;
}
}
if (nf_ct_is_confirmed(ct) ? ((!cached && !skip_add) || add_helper) : commit) {
if (nf_ct_helper(skb, ct, ctinfo, family) != NF_ACCEPT)
goto drop;
}
if (commit) {
tcf_ct_act_set_mark(ct, p->mark, p->mark_mask);
tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
......@@ -1164,6 +1199,9 @@ static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
[TCA_CT_NAT_IPV6_MAX] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 },
[TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 },
[TCA_CT_HELPER_NAME] = { .type = NLA_STRING, .len = NF_CT_HELPER_NAME_LEN },
[TCA_CT_HELPER_FAMILY] = { .type = NLA_U8 },
[TCA_CT_HELPER_PROTO] = { .type = NLA_U8 },
};
static int tcf_ct_fill_params_nat(struct tcf_ct_params *p,
......@@ -1253,8 +1291,9 @@ static int tcf_ct_fill_params(struct net *net,
{
struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id);
struct nf_conntrack_zone zone;
int err, family, proto, len;
struct nf_conn *tmpl;
int err;
char *name;
p->zone = NF_CT_DEFAULT_ZONE_ID;
......@@ -1315,10 +1354,31 @@ static int tcf_ct_fill_params(struct net *net,
NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template");
return -ENOMEM;
}
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
p->tmpl = tmpl;
if (tb[TCA_CT_HELPER_NAME]) {
name = nla_data(tb[TCA_CT_HELPER_NAME]);
len = nla_len(tb[TCA_CT_HELPER_NAME]);
if (len > 16 || name[len - 1] != '\0') {
NL_SET_ERR_MSG_MOD(extack, "Failed to parse helper name.");
err = -EINVAL;
goto err;
}
family = tb[TCA_CT_HELPER_FAMILY] ? nla_get_u8(tb[TCA_CT_HELPER_FAMILY]) : AF_INET;
proto = tb[TCA_CT_HELPER_PROTO] ? nla_get_u8(tb[TCA_CT_HELPER_PROTO]) : IPPROTO_TCP;
err = nf_ct_add_helper(tmpl, name, family, proto,
p->ct_action & TCA_CT_ACT_NAT, &p->helper);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to add helper");
goto err;
}
}
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
err:
nf_ct_put(p->tmpl);
p->tmpl = NULL;
return err;
}
static int tcf_ct_init(struct net *net, struct nlattr *nla,
......@@ -1390,7 +1450,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
err = tcf_ct_flow_table_get(net, params);
if (err)
goto cleanup_params;
goto cleanup;
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
......@@ -1401,17 +1461,15 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (params)
call_rcu(&params->rcu, tcf_ct_params_free);
call_rcu(&params->rcu, tcf_ct_params_free_rcu);
return res;
cleanup_params:
if (params->tmpl)
nf_ct_put(params->tmpl);
cleanup:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
kfree(params);
if (params)
tcf_ct_params_free(params);
tcf_idr_release(*a, bind);
return err;
}
......@@ -1423,7 +1481,7 @@ static void tcf_ct_cleanup(struct tc_action *a)
params = rcu_dereference_protected(c->params, 1);
if (params)
call_rcu(&params->rcu, tcf_ct_params_free);
call_rcu(&params->rcu, tcf_ct_params_free_rcu);
}
static int tcf_ct_dump_key_val(struct sk_buff *skb,
......@@ -1489,6 +1547,19 @@ static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p)
return 0;
}
static int tcf_ct_dump_helper(struct sk_buff *skb, struct nf_conntrack_helper *helper)
{
if (!helper)
return 0;
if (nla_put_string(skb, TCA_CT_HELPER_NAME, helper->name) ||
nla_put_u8(skb, TCA_CT_HELPER_FAMILY, helper->tuple.src.l3num) ||
nla_put_u8(skb, TCA_CT_HELPER_PROTO, helper->tuple.dst.protonum))
return -1;
return 0;
}
static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
......@@ -1541,6 +1612,9 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
if (tcf_ct_dump_nat(skb, p))
goto nla_put_failure;
if (tcf_ct_dump_helper(skb, p->helper))
goto nla_put_failure;
skip_dump:
if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment