Commit 7e5e7d80 authored by Edward Cree's avatar Edward Cree Committed by Jakub Kicinski

sfc: neighbour lookup for TC encap action offload

For each neighbour we're interested in, create a struct efx_neigh_binder
 object which has a list of all the encap_actions using it.  When we
 receive a neighbouring update (through the netevent notifier), find the
 corresponding efx_neigh_binder and update all its users.
Since the actual generation of encap headers is still only a stub, the
 resulting rules still get left on fallback actions.
Signed-off-by: default avatarEdward Cree <ecree.xilinx@gmail.com>
Reviewed-by: default avatarSimon Horman <simon.horman@corigine.com>
Reviewed-by: default avatarPieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent f1363154
......@@ -24,6 +24,7 @@
#include "rx_common.h"
#include "ef100_sriov.h"
#include "tc_bindings.h"
#include "tc_encap_actions.h"
#include "efx_devlink.h"
static void ef100_update_name(struct efx_nic *efx)
......@@ -300,14 +301,38 @@ int ef100_netdev_event(struct notifier_block *this,
{
struct efx_nic *efx = container_of(this, struct efx_nic, netdev_notifier);
struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
struct ef100_nic_data *nic_data = efx->nic_data;
int err;
if (efx->net_dev == net_dev &&
(event == NETDEV_CHANGENAME || event == NETDEV_REGISTER))
ef100_update_name(efx);
if (!nic_data->grp_mae)
return NOTIFY_DONE;
err = efx_tc_netdev_event(efx, event, net_dev);
if (err & NOTIFY_STOP_MASK)
return err;
return NOTIFY_DONE;
}
static int ef100_netevent_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct efx_nic *efx = container_of(this, struct efx_nic, netevent_notifier);
struct ef100_nic_data *nic_data = efx->nic_data;
int err;
if (!nic_data->grp_mae)
return NOTIFY_DONE;
err = efx_tc_netevent_event(efx, event, ptr);
if (err & NOTIFY_STOP_MASK)
return err;
return NOTIFY_DONE;
};
static int ef100_register_netdev(struct efx_nic *efx)
{
struct net_device *net_dev = efx->net_dev;
......@@ -367,6 +392,7 @@ void ef100_remove_netdev(struct efx_probe_data *probe_data)
rtnl_unlock();
unregister_netdevice_notifier(&efx->netdev_notifier);
unregister_netevent_notifier(&efx->netevent_notifier);
#if defined(CONFIG_SFC_SRIOV)
if (!efx->type->is_vf)
efx_ef100_pci_sriov_disable(efx, true);
......@@ -487,6 +513,14 @@ int ef100_probe_netdev(struct efx_probe_data *probe_data)
goto fail;
}
efx->netevent_notifier.notifier_call = ef100_netevent_event;
rc = register_netevent_notifier(&efx->netevent_notifier);
if (rc) {
netif_err(efx, probe, efx->net_dev,
"Failed to register netevent notifier, rc=%d\n", rc);
goto fail;
}
efx_probe_devlink_unlock(efx);
return rc;
fail:
......
......@@ -27,6 +27,7 @@
#include <linux/mtd/mtd.h>
#include <net/busy_poll.h>
#include <net/xdp.h>
#include <net/netevent.h>
#include "enum.h"
#include "bitfield.h"
......@@ -996,6 +997,7 @@ struct efx_mae;
* @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
* xdp_rxq_info structures?
* @netdev_notifier: Netdevice notifier.
* @netevent_notifier: Netevent notifier (for neighbour updates).
* @tc: state for TC offload (EF100).
* @devlink: reference to devlink structure owned by this device
* @dl_port: devlink port associated with the PF
......@@ -1183,6 +1185,7 @@ struct efx_nic {
bool xdp_rxq_info_failed;
struct notifier_block netdev_notifier;
struct notifier_block netevent_notifier;
struct efx_tc_state *tc;
struct devlink *devlink;
......
......@@ -34,7 +34,7 @@ enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
* May return NULL for the PF (us), or an error pointer for a device that
* isn't supported as a TC offload endpoint
*/
static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
struct net_device *dev)
{
struct efx_rep *efv;
......@@ -71,7 +71,7 @@ static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv
}
/* Convert a driver-internal vport ID into an external device (wire or VF) */
static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
{
u32 mport;
......@@ -112,8 +112,10 @@ static void efx_tc_free_action_set(struct efx_nic *efx,
}
if (act->count)
efx_tc_flower_put_counter_index(efx, act->count);
if (act->encap_md)
if (act->encap_md) {
list_del(&act->encap_user);
efx_tc_flower_release_encap_md(efx, act->encap_md);
}
kfree(act);
}
......@@ -1115,6 +1117,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
goto release;
}
act->encap_md = encap;
list_add_tail(&act->encap_user, &encap->users);
act->dest_mport = encap->dest_mport;
act->deliver = 1;
rc = efx_mae_alloc_action_set(efx, act);
......@@ -1123,6 +1126,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
goto release;
}
list_add_tail(&act->list, &rule->acts.list);
act->user = &rule->acts;
act = NULL;
if (fa->id == FLOW_ACTION_REDIRECT)
break; /* end of the line */
......
......@@ -36,6 +36,8 @@ struct efx_tc_action_set {
__be16 vlan_proto[2]; /* Ethertypes for vlan_push */
struct efx_tc_counter_index *count;
struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */
struct list_head encap_user; /* entry on encap_md->users list */
struct efx_tc_action_set_list *user; /* Only populated if encap_md */
u32 dest_mport;
u32 fw_id; /* index of this entry in firmware actions table */
struct list_head list;
......@@ -151,6 +153,7 @@ enum efx_tc_rule_prios {
* @encap_ht: Hashtable of TC encap actions
* @encap_match_ht: Hashtable of TC encap matches
* @match_action_ht: Hashtable of TC match-action rules
* @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder)
* @reps_mport_id: MAE port allocated for representor RX
* @reps_filter_uc: VNIC filter for representor unicast RX (promisc)
* @reps_filter_mc: VNIC filter for representor multicast RX (allmulti)
......@@ -181,6 +184,7 @@ struct efx_tc_state {
struct rhashtable encap_ht;
struct rhashtable encap_match_ht;
struct rhashtable match_action_ht;
struct rhashtable neigh_ht;
u32 reps_mport_id, reps_mport_vport_id;
s32 reps_filter_uc, reps_filter_mc;
bool flush_counters;
......@@ -201,6 +205,9 @@ struct efx_tc_state {
struct efx_rep;
enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev);
struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
struct net_device *dev);
s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv);
int efx_tc_configure_default_rule_rep(struct efx_rep *efv);
void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
struct efx_tc_flow_rule *rule);
......
......@@ -10,6 +10,7 @@
#include "tc_bindings.h"
#include "tc.h"
#include "tc_encap_actions.h"
struct efx_tc_block_binding {
struct list_head list;
......@@ -226,3 +227,15 @@ int efx_tc_setup(struct net_device *net_dev, enum tc_setup_type type,
return -EOPNOTSUPP;
}
int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event,
struct net_device *net_dev)
{
if (efx->type->is_vf)
return NOTIFY_DONE;
if (event == NETDEV_UNREGISTER)
efx_tc_unregister_egdev(efx, net_dev);
return NOTIFY_OK;
}
......@@ -26,4 +26,6 @@ int efx_tc_indr_setup_cb(struct net_device *net_dev, struct Qdisc *sch,
void *cb_priv, enum tc_setup_type type,
void *type_data, void *data,
void (*cleanup)(struct flow_block_cb *block_cb));
int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event,
struct net_device *net_dev);
#endif /* EFX_TC_BINDINGS_H */
......@@ -13,6 +13,14 @@
#include "mae.h"
#include <net/vxlan.h>
#include <net/geneve.h>
#include <net/netevent.h>
#include <net/arp.h>
static const struct rhashtable_params efx_neigh_ht_params = {
.key_len = offsetof(struct efx_neigh_binder, ha),
.key_offset = 0,
.head_offset = offsetof(struct efx_neigh_binder, linkage),
};
static const struct rhashtable_params efx_tc_encap_ht_params = {
.key_len = offsetofend(struct efx_tc_encap_action, key),
......@@ -28,9 +36,32 @@ static void efx_tc_encap_free(void *ptr, void *__unused)
kfree(enc);
}
static void efx_neigh_free(void *ptr, void *__unused)
{
struct efx_neigh_binder *neigh = ptr;
WARN_ON(refcount_read(&neigh->ref));
WARN_ON(!list_empty(&neigh->users));
put_net_track(neigh->net, &neigh->ns_tracker);
netdev_put(neigh->egdev, &neigh->dev_tracker);
kfree(neigh);
}
int efx_tc_init_encap_actions(struct efx_nic *efx)
{
return rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
int rc;
rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params);
if (rc < 0)
goto fail_neigh_ht;
rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
if (rc < 0)
goto fail_encap_ht;
return 0;
fail_encap_ht:
rhashtable_destroy(&efx->tc->neigh_ht);
fail_neigh_ht:
return rc;
}
/* Only call this in init failure teardown.
......@@ -39,11 +70,337 @@ int efx_tc_init_encap_actions(struct efx_nic *efx)
void efx_tc_destroy_encap_actions(struct efx_nic *efx)
{
rhashtable_destroy(&efx->tc->encap_ht);
rhashtable_destroy(&efx->tc->neigh_ht);
}
void efx_tc_fini_encap_actions(struct efx_nic *efx)
{
rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL);
rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL);
}
static void efx_neigh_update(struct work_struct *work);
static int efx_bind_neigh(struct efx_nic *efx,
struct efx_tc_encap_action *encap, struct net *net,
struct netlink_ext_ack *extack)
{
struct efx_neigh_binder *neigh, *old;
struct flowi6 flow6 = {};
struct flowi4 flow4 = {};
int rc;
/* GCC stupidly thinks that only values explicitly listed in the enum
* definition can _possibly_ be sensible case values, so without this
* cast it complains about the IPv6 versions.
*/
switch ((int)encap->type) {
case EFX_ENCAP_TYPE_VXLAN:
case EFX_ENCAP_TYPE_GENEVE:
flow4.flowi4_proto = IPPROTO_UDP;
flow4.fl4_dport = encap->key.tp_dst;
flow4.flowi4_tos = encap->key.tos;
flow4.daddr = encap->key.u.ipv4.dst;
flow4.saddr = encap->key.u.ipv4.src;
break;
case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
flow6.flowi6_proto = IPPROTO_UDP;
flow6.fl6_dport = encap->key.tp_dst;
flow6.flowlabel = ip6_make_flowinfo(encap->key.tos,
encap->key.label);
flow6.daddr = encap->key.u.ipv6.dst;
flow6.saddr = encap->key.u.ipv6.src;
break;
default:
NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
(int)encap->type);
return -EOPNOTSUPP;
}
neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT);
if (!neigh)
return -ENOMEM;
neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT);
neigh->dst_ip = flow4.daddr;
neigh->dst_ip6 = flow6.daddr;
old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht,
&neigh->linkage,
efx_neigh_ht_params);
if (old) {
/* don't need our new entry */
put_net_track(neigh->net, &neigh->ns_tracker);
kfree(neigh);
if (!refcount_inc_not_zero(&old->ref))
return -EAGAIN;
/* existing entry found, ref taken */
neigh = old;
} else {
/* New entry. We need to initiate a lookup */
struct neighbour *n;
struct rtable *rt;
if (encap->type & EFX_ENCAP_FLAG_IPV6) {
#if IS_ENABLED(CONFIG_IPV6)
struct dst_entry *dst;
dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
NULL);
rc = PTR_ERR_OR_ZERO(dst);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
goto out_free;
}
neigh->egdev = dst->dev;
netdev_hold(neigh->egdev, &neigh->dev_tracker,
GFP_KERNEL_ACCOUNT);
neigh->ttl = ip6_dst_hoplimit(dst);
n = dst_neigh_lookup(dst, &flow6.daddr);
dst_release(dst);
#else
/* We shouldn't ever get here, because if IPv6 isn't
* enabled how did someone create an IPv6 tunnel_key?
*/
rc = -EOPNOTSUPP;
NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
#endif
} else {
rt = ip_route_output_key(net, &flow4);
if (IS_ERR_OR_NULL(rt)) {
rc = PTR_ERR_OR_ZERO(rt);
if (!rc)
rc = -EIO;
NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
goto out_free;
}
neigh->egdev = rt->dst.dev;
netdev_hold(neigh->egdev, &neigh->dev_tracker,
GFP_KERNEL_ACCOUNT);
neigh->ttl = ip4_dst_hoplimit(&rt->dst);
n = dst_neigh_lookup(&rt->dst, &flow4.daddr);
ip_rt_put(rt);
}
if (!n) {
rc = -ENETUNREACH;
NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
netdev_put(neigh->egdev, &neigh->dev_tracker);
goto out_free;
}
refcount_set(&neigh->ref, 1);
INIT_LIST_HEAD(&neigh->users);
read_lock_bh(&n->lock);
ether_addr_copy(neigh->ha, n->ha);
neigh->n_valid = n->nud_state & NUD_VALID;
read_unlock_bh(&n->lock);
rwlock_init(&neigh->lock);
INIT_WORK(&neigh->work, efx_neigh_update);
neigh->efx = efx;
neigh->used = jiffies;
if (!neigh->n_valid)
/* Prod ARP to find us a neighbour */
neigh_event_send(n, NULL);
neigh_release(n);
}
/* Add us to this neigh */
encap->neigh = neigh;
list_add_tail(&encap->list, &neigh->users);
return 0;
out_free:
/* cleanup common to several error paths */
rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
efx_neigh_ht_params);
synchronize_rcu();
put_net_track(net, &neigh->ns_tracker);
kfree(neigh);
return rc;
}
static void efx_free_neigh(struct efx_neigh_binder *neigh)
{
struct efx_nic *efx = neigh->efx;
rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
efx_neigh_ht_params);
synchronize_rcu();
netdev_put(neigh->egdev, &neigh->dev_tracker);
put_net_track(neigh->net, &neigh->ns_tracker);
kfree(neigh);
}
static void efx_release_neigh(struct efx_nic *efx,
struct efx_tc_encap_action *encap)
{
struct efx_neigh_binder *neigh = encap->neigh;
if (!neigh)
return;
list_del(&encap->list);
encap->neigh = NULL;
if (!refcount_dec_and_test(&neigh->ref))
return; /* still in use */
efx_free_neigh(neigh);
}
static void efx_gen_encap_header(struct efx_tc_encap_action *encap)
{
/* stub for now */
encap->n_valid = false;
memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
encap->encap_hdr_len = ETH_HLEN;
}
static void efx_tc_update_encap(struct efx_nic *efx,
struct efx_tc_encap_action *encap)
{
struct efx_tc_action_set_list *acts, *fallback;
struct efx_tc_flow_rule *rule;
struct efx_tc_action_set *act;
int rc;
if (encap->n_valid) {
/* Make sure no rules are using this encap while we change it */
list_for_each_entry(act, &encap->users, encap_user) {
acts = act->user;
if (WARN_ON(!acts)) /* can't happen */
continue;
rule = container_of(acts, struct efx_tc_flow_rule, acts);
if (rule->fallback)
fallback = rule->fallback;
else /* fallback fallback: deliver to PF */
fallback = &efx->tc->facts.pf;
rc = efx_mae_update_rule(efx, fallback->fw_id,
rule->fw_id);
if (rc)
netif_err(efx, drv, efx->net_dev,
"Failed to update (f) rule %08x rc %d\n",
rule->fw_id, rc);
else
netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n",
rule->fw_id);
}
}
if (encap->neigh) {
read_lock_bh(&encap->neigh->lock);
efx_gen_encap_header(encap);
read_unlock_bh(&encap->neigh->lock);
} else {
encap->n_valid = false;
memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
encap->encap_hdr_len = ETH_HLEN;
}
rc = efx_mae_update_encap_md(efx, encap);
if (rc) {
netif_err(efx, drv, efx->net_dev,
"Failed to update encap hdr %08x rc %d\n",
encap->fw_id, rc);
return;
}
netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n",
encap->fw_id);
if (!encap->n_valid)
return;
/* Update rule users: use the action if they are now ready */
list_for_each_entry(act, &encap->users, encap_user) {
acts = act->user;
if (WARN_ON(!acts)) /* can't happen */
continue;
rule = container_of(acts, struct efx_tc_flow_rule, acts);
if (!efx_tc_check_ready(efx, rule))
continue;
rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id);
if (rc)
netif_err(efx, drv, efx->net_dev,
"Failed to update rule %08x rc %d\n",
rule->fw_id, rc);
else
netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n",
rule->fw_id);
}
}
static void efx_neigh_update(struct work_struct *work)
{
struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work);
struct efx_tc_encap_action *encap;
struct efx_nic *efx = neigh->efx;
mutex_lock(&efx->tc->mutex);
list_for_each_entry(encap, &neigh->users, list)
efx_tc_update_encap(neigh->efx, encap);
/* release ref taken in efx_neigh_event() */
if (refcount_dec_and_test(&neigh->ref))
efx_free_neigh(neigh);
mutex_unlock(&efx->tc->mutex);
}
static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n)
{
struct efx_neigh_binder keys = {NULL}, *neigh;
bool n_valid, ipv6 = false;
char ha[ETH_ALEN];
size_t keysize;
if (WARN_ON(!efx->tc))
return NOTIFY_DONE;
if (n->tbl == &arp_tbl) {
keysize = sizeof(keys.dst_ip);
#if IS_ENABLED(CONFIG_IPV6)
} else if (n->tbl == ipv6_stub->nd_tbl) {
ipv6 = true;
keysize = sizeof(keys.dst_ip6);
#endif
} else {
return NOTIFY_DONE;
}
if (!n->parms) {
netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n");
return NOTIFY_DONE;
}
keys.net = read_pnet(&n->parms->net);
if (n->tbl->key_len != keysize) {
netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n",
n->tbl->key_len);
return NOTIFY_DONE;
}
read_lock_bh(&n->lock); /* Get a consistent view */
memcpy(ha, n->ha, ETH_ALEN);
n_valid = (n->nud_state & NUD_VALID) && !n->dead;
read_unlock_bh(&n->lock);
if (ipv6)
memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len);
else
memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len);
rcu_read_lock();
neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys,
efx_neigh_ht_params);
if (!neigh || neigh->dying)
/* We're not interested in this neighbour */
goto done;
write_lock_bh(&neigh->lock);
if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) {
write_unlock_bh(&neigh->lock);
/* Nothing has changed; no work to do */
goto done;
}
neigh->n_valid = n_valid;
memcpy(neigh->ha, ha, ETH_ALEN);
write_unlock_bh(&neigh->lock);
if (refcount_inc_not_zero(&neigh->ref)) {
rcu_read_unlock();
if (!schedule_work(&neigh->work))
/* failed to schedule, release the ref we just took */
if (refcount_dec_and_test(&neigh->ref))
efx_free_neigh(neigh);
} else {
done:
rcu_read_unlock();
}
return NOTIFY_DONE;
}
bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
......@@ -54,7 +411,7 @@ bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
* neighbour info for the outer Ethernet header.
*/
list_for_each_entry(act, &rule->acts.list, list)
if (act->encap_md) /* neigh bindings not implemented yet */
if (act->encap_md && !act->encap_md->n_valid)
return false;
return true;
}
......@@ -65,6 +422,7 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
{
enum efx_encap_type type = efx_tc_indr_netdev_type(egdev);
struct efx_tc_encap_action *encap, *old;
struct efx_rep *to_efv;
s64 rc;
if (type == EFX_ENCAP_TYPE_NONE) {
......@@ -98,6 +456,7 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
return ERR_PTR(-ENOMEM);
encap->type = type;
encap->key = info->key;
INIT_LIST_HEAD(&encap->users);
old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht,
&encap->linkage,
efx_tc_encap_ht_params);
......@@ -110,9 +469,42 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
return old;
}
rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack);
if (rc < 0)
goto out_remove;
to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev);
if (IS_ERR(to_efv)) {
/* neigh->egdev isn't ours */
NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch");
rc = PTR_ERR(to_efv);
goto out_release;
}
rc = efx_tc_flower_external_mport(efx, to_efv);
if (rc < 0) {
NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port");
goto out_release;
}
encap->dest_mport = rc;
read_lock_bh(&encap->neigh->lock);
efx_gen_encap_header(encap);
read_unlock_bh(&encap->neigh->lock);
rc = efx_mae_allocate_encap_md(efx, encap);
if (rc < 0) {
NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw");
goto out_release;
}
/* ref and return */
refcount_set(&encap->ref, 1);
return encap;
out_release:
efx_release_neigh(efx, encap);
out_remove:
rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
efx_tc_encap_ht_params);
kfree(encap);
return ERR_PTR(rc);
}
void efx_tc_flower_release_encap_md(struct efx_nic *efx,
......@@ -120,7 +512,59 @@ void efx_tc_flower_release_encap_md(struct efx_nic *efx,
{
if (!refcount_dec_and_test(&encap->ref))
return; /* still in use */
efx_release_neigh(efx, encap);
rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
efx_tc_encap_ht_params);
efx_mae_free_encap_md(efx, encap);
kfree(encap);
}
static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh)
{
struct efx_tc_encap_action *encap, *next;
list_for_each_entry_safe(encap, next, &neigh->users, list) {
/* Should cause neigh usage count to fall to zero, freeing it */
efx_release_neigh(efx, encap);
/* The encap has lost its neigh, so it's now unready */
efx_tc_update_encap(efx, encap);
}
}
void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev)
{
struct efx_neigh_binder *neigh;
struct rhashtable_iter walk;
mutex_lock(&efx->tc->mutex);
rhashtable_walk_enter(&efx->tc->neigh_ht, &walk);
rhashtable_walk_start(&walk);
while ((neigh = rhashtable_walk_next(&walk)) != NULL) {
if (IS_ERR(neigh))
continue;
if (neigh->egdev != net_dev)
continue;
neigh->dying = true;
rhashtable_walk_stop(&walk);
synchronize_rcu(); /* Make sure any updates see dying flag */
efx_tc_remove_neigh_users(efx, neigh); /* might sleep */
rhashtable_walk_start(&walk);
}
rhashtable_walk_stop(&walk);
rhashtable_walk_exit(&walk);
mutex_unlock(&efx->tc->mutex);
}
int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
void *ptr)
{
if (efx->type->is_vf)
return NOTIFY_DONE;
switch (event) {
case NETEVENT_NEIGH_UPDATE:
return efx_neigh_event(efx, ptr);
default:
return NOTIFY_DONE;
}
}
......@@ -15,6 +15,54 @@
#include <linux/refcount.h>
#include <net/tc_act/tc_tunnel_key.h>
/**
* struct efx_neigh_binder - driver state for a neighbour entry
* @net: the network namespace in which this neigh resides
* @dst_ip: the IPv4 destination address resolved by this neigh
* @dst_ip6: the IPv6 destination address resolved by this neigh
* @ha: the hardware (Ethernet) address of the neighbour
* @n_valid: true if the neighbour is in NUD_VALID state
* @lock: protects @ha and @n_valid
* @ttl: Time To Live associated with the route used
* @dying: set when egdev is going away, to skip further updates
* @egdev: egress device from the route lookup. Holds a reference
* @dev_tracker: reference tracker entry for @egdev
* @ns_tracker: reference tracker entry for @ns
* @ref: counts encap actions referencing this entry
* @used: jiffies of last time traffic hit any encap action using this.
* When counter reads update this, a new neighbour event is sent to
* indicate that the neighbour entry is still in use.
* @users: list of &struct efx_tc_encap_action
* @linkage: entry in efx->neigh_ht (keys are @net, @dst_ip, @dst_ip6).
* @work: processes neighbour state changes, updates the encap actions
* @efx: owning NIC instance.
*
* Associates a neighbour entry with the encap actions that are
* interested in it, allowing the latter to be updated when the
* neighbour details change.
* Whichever of @dst_ip and @dst_ip6 is not in use will be all-zeroes,
* this distinguishes IPv4 from IPv6 entries.
*/
struct efx_neigh_binder {
struct net *net;
__be32 dst_ip;
struct in6_addr dst_ip6;
char ha[ETH_ALEN];
bool n_valid;
rwlock_t lock;
u8 ttl;
bool dying;
struct net_device *egdev;
netdevice_tracker dev_tracker;
netns_tracker ns_tracker;
refcount_t ref;
unsigned long used;
struct list_head users;
struct rhash_head linkage;
struct work_struct work;
struct efx_nic *efx;
};
/* This limit is arbitrary; current hardware (SN1022) handles encap headers
* of up to 126 bytes, but that limit is not enshrined in the MCDI protocol.
*/
......@@ -24,7 +72,11 @@ struct efx_tc_encap_action {
struct ip_tunnel_key key; /* 52 bytes */
u32 dest_mport; /* is copied into struct efx_tc_action_set */
u8 encap_hdr_len;
bool n_valid;
u8 encap_hdr[EFX_TC_MAX_ENCAP_HDR];
struct efx_neigh_binder *neigh;
struct list_head list; /* entry on neigh->users list */
struct list_head users; /* action sets using this encap_md */
struct rhash_head linkage; /* efx->tc_encap_ht */
refcount_t ref;
u32 fw_id; /* index of this entry in firmware encap table */
......@@ -44,4 +96,8 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
void efx_tc_flower_release_encap_md(struct efx_nic *efx,
struct efx_tc_encap_action *encap);
void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev);
int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
void *ptr);
#endif /* EFX_TC_ENCAP_ACTIONS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment