Commit 7cf497e5 authored by David S. Miller's avatar David S. Miller

Merge branch 'nexthop-group-stats'

Petr Machata says:

====================
Support for nexthop group statistics

ECMP is a fundamental component in L3 designs. However, it's fragile. Many
factors influence whether an ECMP group will operate as intended: hash
policy (i.e. the set of fields that contribute to ECMP hash calculation),
neighbor validity, hash seed (which might lead to polarization) or the type
of ECMP group used (hash-threshold or resilient).

At the same time, collection of statistics that would help an operator
determine that the group performs as desired, is difficult.

A solution that we present in this patchset is to add counters to next hop
group entries. For SW-datapath deployments, this will on its own allow
collection and evaluation of relevant statistics. For HW-datapath
deployments, we further add a way to request that HW counters be installed
for a given group, in-kernel interfaces to collect the HW statistics, and
netlink interfaces to query them.

For example:

    # ip nexthop replace id 4000 group 4001/4002 hw_stats on

    # ip -s -d nexthop show id 4000
    id 4000 group 4001/4002 scope global proto unspec offload hw_stats on used on
      stats:
        id 4001 packets 5002 packets_hw 5000
        id 4002 packets 4999 packets_hw 4999

The point of the patchset is visibility of ECMP balance, and that is
influenced by packet headers, not their payload. Correspondingly, we only
include packet counters in the statistics, not byte counters.

We also decided to model HW statistics as a nexthop group attribute, not an
arbitrary nexthop one. The latter would count any traffic going through a
given nexthop, regardless of which ECMP group it is in, or any at all. The
reason is again hat the point of the patchset is ECMP balance visibility,
not arbitrary inspection of how busy a particular nexthop is.
Implementation of individual-nexthop statistics is certainly possible, and
could well follow the general approach we are taking in this patchset.
For resilient groups, per-bucket statistics could be done in a similar
manner as well.

This patchset contains the core code. mlxsw support will be sent in a
follow-up patch set.

This patchset progresses as follows:

- Patches #1 and #2 add support for a new next-hop object attribute,
  NHA_OP_FLAGS. That is meant to carry various op-specific signaling, in
  particular whether SW- and HW-collected nexthop stats should be part of
  the get or dump response. The idea is to avoid wasting message space, and
  time for collection of HW statistics, when the values are not needed.

- Patches #3 and #4 add SW-datapath stats and corresponding UAPI.

- Patches #5, #6 and #7 add support fro HW-datapath stats and UAPI.
  Individual drivers still need to contribute the appropriate HW-specific
  support code.

v4:
- Patch #2:
    - s/nla_get_bitfield32/nla_get_u32/ in __nh_valid_dump_req().

v3:
- Patch #3:
    - Convert to u64_stats_t
- Patch #4:
    - Give a symbolic name to the set of all valid dump flags
      for the NHA_OP_FLAGS attribute.
    - Convert to u64_stats_t
- Patch #6:
    - Use a named constant for the NHA_HW_STATS_ENABLE policy.

v2:
- Patch #2:
    - Change OP_FLAGS to u32, enforce through NLA_POLICY_MASK
- Patch #3:
    - Set err on nexthop_create_group() error path
- Patch #4:
    - Use uint to encode NHA_GROUP_STATS_ENTRY_PACKETS
    - Rename jump target in nla_put_nh_group_stats() to avoid
      having to rename further in the patchset.
- Patch #7:
    - Use uint to encode NHA_GROUP_STATS_ENTRY_PACKETS_HW
    - Do not cancel outside of nesting in nla_put_nh_group_stats()
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3b43f19d 5072ae00
...@@ -47,6 +47,8 @@ struct nh_config { ...@@ -47,6 +47,8 @@ struct nh_config {
bool nh_grp_res_has_idle_timer; bool nh_grp_res_has_idle_timer;
bool nh_grp_res_has_unbalanced_timer; bool nh_grp_res_has_unbalanced_timer;
bool nh_hw_stats;
struct nlattr *nh_encap; struct nlattr *nh_encap;
u16 nh_encap_type; u16 nh_encap_type;
...@@ -95,8 +97,14 @@ struct nh_res_table { ...@@ -95,8 +97,14 @@ struct nh_res_table {
struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
}; };
struct nh_grp_entry_stats {
u64_stats_t packets;
struct u64_stats_sync syncp;
};
struct nh_grp_entry { struct nh_grp_entry {
struct nexthop *nh; struct nexthop *nh;
struct nh_grp_entry_stats __percpu *stats;
u8 weight; u8 weight;
union { union {
...@@ -114,6 +122,7 @@ struct nh_grp_entry { ...@@ -114,6 +122,7 @@ struct nh_grp_entry {
struct list_head nh_list; struct list_head nh_list;
struct nexthop *nh_parent; /* nexthop of group with this entry */ struct nexthop *nh_parent; /* nexthop of group with this entry */
u64 packets_hw;
}; };
struct nh_group { struct nh_group {
...@@ -124,6 +133,7 @@ struct nh_group { ...@@ -124,6 +133,7 @@ struct nh_group {
bool resilient; bool resilient;
bool fdb_nh; bool fdb_nh;
bool has_v4; bool has_v4;
bool hw_stats;
struct nh_res_table __rcu *res_table; struct nh_res_table __rcu *res_table;
struct nh_grp_entry nh_entries[] __counted_by(num_nh); struct nh_grp_entry nh_entries[] __counted_by(num_nh);
...@@ -157,6 +167,7 @@ enum nexthop_event_type { ...@@ -157,6 +167,7 @@ enum nexthop_event_type {
NEXTHOP_EVENT_REPLACE, NEXTHOP_EVENT_REPLACE,
NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
NEXTHOP_EVENT_BUCKET_REPLACE, NEXTHOP_EVENT_BUCKET_REPLACE,
NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
}; };
enum nh_notifier_info_type { enum nh_notifier_info_type {
...@@ -164,6 +175,7 @@ enum nh_notifier_info_type { ...@@ -164,6 +175,7 @@ enum nh_notifier_info_type {
NH_NOTIFIER_INFO_TYPE_GRP, NH_NOTIFIER_INFO_TYPE_GRP,
NH_NOTIFIER_INFO_TYPE_RES_TABLE, NH_NOTIFIER_INFO_TYPE_RES_TABLE,
NH_NOTIFIER_INFO_TYPE_RES_BUCKET, NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
}; };
struct nh_notifier_single_info { struct nh_notifier_single_info {
...@@ -187,6 +199,7 @@ struct nh_notifier_grp_entry_info { ...@@ -187,6 +199,7 @@ struct nh_notifier_grp_entry_info {
struct nh_notifier_grp_info { struct nh_notifier_grp_info {
u16 num_nh; u16 num_nh;
bool is_fdb; bool is_fdb;
bool hw_stats;
struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
}; };
...@@ -200,9 +213,21 @@ struct nh_notifier_res_bucket_info { ...@@ -200,9 +213,21 @@ struct nh_notifier_res_bucket_info {
struct nh_notifier_res_table_info { struct nh_notifier_res_table_info {
u16 num_nh_buckets; u16 num_nh_buckets;
bool hw_stats;
struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
}; };
struct nh_notifier_grp_hw_stats_entry_info {
u32 id;
u64 packets;
};
struct nh_notifier_grp_hw_stats_info {
u16 num_nh;
bool hw_stats_used;
struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
};
struct nh_notifier_info { struct nh_notifier_info {
struct net *net; struct net *net;
struct netlink_ext_ack *extack; struct netlink_ext_ack *extack;
...@@ -213,6 +238,7 @@ struct nh_notifier_info { ...@@ -213,6 +238,7 @@ struct nh_notifier_info {
struct nh_notifier_grp_info *nh_grp; struct nh_notifier_grp_info *nh_grp;
struct nh_notifier_res_table_info *nh_res_table; struct nh_notifier_res_table_info *nh_res_table;
struct nh_notifier_res_bucket_info *nh_res_bucket; struct nh_notifier_res_bucket_info *nh_res_bucket;
struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
}; };
}; };
...@@ -225,6 +251,9 @@ void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, ...@@ -225,6 +251,9 @@ void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
bool offload, bool trap); bool offload, bool trap);
void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
unsigned long *activity); unsigned long *activity);
void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
unsigned int nh_idx,
u64 delta_packets);
/* caller is holding rcu or rtnl; no reference taken to nexthop */ /* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id); struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
......
...@@ -30,6 +30,9 @@ enum { ...@@ -30,6 +30,9 @@ enum {
#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) #define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
#define NHA_OP_FLAG_DUMP_STATS BIT(0)
#define NHA_OP_FLAG_DUMP_HW_STATS BIT(1)
enum { enum {
NHA_UNSPEC, NHA_UNSPEC,
NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */
...@@ -60,6 +63,18 @@ enum { ...@@ -60,6 +63,18 @@ enum {
/* nested; nexthop bucket attributes */ /* nested; nexthop bucket attributes */
NHA_RES_BUCKET, NHA_RES_BUCKET,
/* u32; operation-specific flags */
NHA_OP_FLAGS,
/* nested; nexthop group stats */
NHA_GROUP_STATS,
/* u32; nexthop hardware stats enable */
NHA_HW_STATS_ENABLE,
/* u32; read-only; whether any driver collects HW stats */
NHA_HW_STATS_USED,
__NHA_MAX, __NHA_MAX,
}; };
...@@ -101,4 +116,34 @@ enum { ...@@ -101,4 +116,34 @@ enum {
#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) #define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1)
enum {
NHA_GROUP_STATS_UNSPEC,
/* nested; nexthop group entry stats */
NHA_GROUP_STATS_ENTRY,
__NHA_GROUP_STATS_MAX,
};
#define NHA_GROUP_STATS_MAX (__NHA_GROUP_STATS_MAX - 1)
enum {
NHA_GROUP_STATS_ENTRY_UNSPEC,
/* u32; nexthop id of the nexthop group entry */
NHA_GROUP_STATS_ENTRY_ID,
/* uint; number of packets forwarded via the nexthop group entry */
NHA_GROUP_STATS_ENTRY_PACKETS,
/* uint; number of packets forwarded via the nexthop group entry in
* hardware
*/
NHA_GROUP_STATS_ENTRY_PACKETS_HW,
__NHA_GROUP_STATS_ENTRY_MAX,
};
#define NHA_GROUP_STATS_ENTRY_MAX (__NHA_GROUP_STATS_ENTRY_MAX - 1)
#endif #endif
...@@ -26,6 +26,9 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, ...@@ -26,6 +26,9 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
#define NH_DEV_HASHBITS 8 #define NH_DEV_HASHBITS 8
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS | \
NHA_OP_FLAG_DUMP_HW_STATS)
static const struct nla_policy rtm_nh_policy_new[] = { static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ID] = { .type = NLA_U32 }, [NHA_ID] = { .type = NLA_U32 },
[NHA_GROUP] = { .type = NLA_BINARY }, [NHA_GROUP] = { .type = NLA_BINARY },
...@@ -37,10 +40,17 @@ static const struct nla_policy rtm_nh_policy_new[] = { ...@@ -37,10 +40,17 @@ static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ENCAP] = { .type = NLA_NESTED }, [NHA_ENCAP] = { .type = NLA_NESTED },
[NHA_FDB] = { .type = NLA_FLAG }, [NHA_FDB] = { .type = NLA_FLAG },
[NHA_RES_GROUP] = { .type = NLA_NESTED }, [NHA_RES_GROUP] = { .type = NLA_NESTED },
[NHA_HW_STATS_ENABLE] = NLA_POLICY_MAX(NLA_U32, true),
}; };
static const struct nla_policy rtm_nh_policy_get[] = { static const struct nla_policy rtm_nh_policy_get[] = {
[NHA_ID] = { .type = NLA_U32 }, [NHA_ID] = { .type = NLA_U32 },
[NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32,
NHA_OP_FLAGS_DUMP_ALL),
};
static const struct nla_policy rtm_nh_policy_del[] = {
[NHA_ID] = { .type = NLA_U32 },
}; };
static const struct nla_policy rtm_nh_policy_dump[] = { static const struct nla_policy rtm_nh_policy_dump[] = {
...@@ -48,6 +58,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = { ...@@ -48,6 +58,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = {
[NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_GROUPS] = { .type = NLA_FLAG },
[NHA_MASTER] = { .type = NLA_U32 }, [NHA_MASTER] = { .type = NLA_U32 },
[NHA_FDB] = { .type = NLA_FLAG }, [NHA_FDB] = { .type = NLA_FLAG },
[NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32,
NHA_OP_FLAGS_DUMP_ALL),
}; };
static const struct nla_policy rtm_nh_res_policy_new[] = { static const struct nla_policy rtm_nh_res_policy_new[] = {
...@@ -131,6 +143,7 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, ...@@ -131,6 +143,7 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
info->nh_grp->num_nh = num_nh; info->nh_grp->num_nh = num_nh;
info->nh_grp->is_fdb = nhg->fdb_nh; info->nh_grp->is_fdb = nhg->fdb_nh;
info->nh_grp->hw_stats = nhg->hw_stats;
for (i = 0; i < num_nh; i++) { for (i = 0; i < num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_grp_entry *nhge = &nhg->nh_entries[i];
...@@ -162,6 +175,7 @@ static int nh_notifier_res_table_info_init(struct nh_notifier_info *info, ...@@ -162,6 +175,7 @@ static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
return -ENOMEM; return -ENOMEM;
info->nh_res_table->num_nh_buckets = num_nh_buckets; info->nh_res_table->num_nh_buckets = num_nh_buckets;
info->nh_res_table->hw_stats = nhg->hw_stats;
for (i = 0; i < num_nh_buckets; i++) { for (i = 0; i < num_nh_buckets; i++) {
struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
...@@ -474,6 +488,7 @@ static void nexthop_free_group(struct nexthop *nh) ...@@ -474,6 +488,7 @@ static void nexthop_free_group(struct nexthop *nh)
struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_grp_entry *nhge = &nhg->nh_entries[i];
WARN_ON(!list_empty(&nhge->nh_list)); WARN_ON(!list_empty(&nhge->nh_list));
free_percpu(nhge->stats);
nexthop_put(nhge->nh); nexthop_put(nhge->nh);
} }
...@@ -654,8 +669,201 @@ static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg) ...@@ -654,8 +669,201 @@ static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg)
return -EMSGSIZE; return -EMSGSIZE;
} }
static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge)
{
struct nh_grp_entry_stats *cpu_stats;
cpu_stats = this_cpu_ptr(nhge->stats);
u64_stats_update_begin(&cpu_stats->syncp);
u64_stats_inc(&cpu_stats->packets);
u64_stats_update_end(&cpu_stats->syncp);
}
static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge,
u64 *ret_packets)
{
int i;
*ret_packets = 0;
for_each_possible_cpu(i) {
struct nh_grp_entry_stats *cpu_stats;
unsigned int start;
u64 packets;
cpu_stats = per_cpu_ptr(nhge->stats, i);
do {
start = u64_stats_fetch_begin(&cpu_stats->syncp);
packets = u64_stats_read(&cpu_stats->packets);
} while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
*ret_packets += packets;
}
}
static int nh_notifier_grp_hw_stats_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
struct nh_group *nhg;
int i;
ASSERT_RTNL();
nhg = rtnl_dereference(nh->nh_grp);
info->id = nh->id;
info->type = NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS;
info->nh_grp_hw_stats = kzalloc(struct_size(info->nh_grp_hw_stats,
stats, nhg->num_nh),
GFP_KERNEL);
if (!info->nh_grp_hw_stats)
return -ENOMEM;
info->nh_grp_hw_stats->num_nh = nhg->num_nh;
for (i = 0; i < nhg->num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
info->nh_grp_hw_stats->stats[i].id = nhge->nh->id;
}
return 0;
}
static void nh_notifier_grp_hw_stats_fini(struct nh_notifier_info *info)
{
kfree(info->nh_grp_hw_stats);
}
void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
unsigned int nh_idx,
u64 delta_packets)
{
info->hw_stats_used = true;
info->stats[nh_idx].packets += delta_packets;
}
EXPORT_SYMBOL(nh_grp_hw_stats_report_delta);
static void nh_grp_hw_stats_apply_update(struct nexthop *nh,
struct nh_notifier_info *info)
{
struct nh_group *nhg;
int i;
ASSERT_RTNL();
nhg = rtnl_dereference(nh->nh_grp);
for (i = 0; i < nhg->num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
nhge->packets_hw += info->nh_grp_hw_stats->stats[i].packets;
}
}
static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used)
{
struct nh_notifier_info info = {
.net = nh->net,
};
struct net *net = nh->net;
int err;
if (nexthop_notifiers_is_empty(net))
return 0;
err = nh_notifier_grp_hw_stats_init(&info, nh);
if (err)
return err;
err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
&info);
/* Cache whatever we got, even if there was an error, otherwise the
* successful stats retrievals would get lost.
*/
nh_grp_hw_stats_apply_update(nh, &info);
*hw_stats_used = info.nh_grp_hw_stats->hw_stats_used;
nh_notifier_grp_hw_stats_fini(&info);
return notifier_to_errno(err);
}
static int nla_put_nh_group_stats_entry(struct sk_buff *skb,
struct nh_grp_entry *nhge,
u32 op_flags)
{
struct nlattr *nest;
u64 packets;
nh_grp_entry_stats_read(nhge, &packets);
nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY);
if (!nest)
return -EMSGSIZE;
if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) ||
nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS,
packets + nhge->packets_hw))
goto nla_put_failure;
if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS_HW,
nhge->packets_hw))
goto nla_put_failure;
nla_nest_end(skb, nest);
return 0;
nla_put_failure:
nla_nest_cancel(skb, nest);
return -EMSGSIZE;
}
static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh,
u32 op_flags)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nlattr *nest;
bool hw_stats_used;
int err;
int i;
if (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats))
goto err_out;
if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS &&
nhg->hw_stats) {
err = nh_grp_hw_stats_update(nh, &hw_stats_used);
if (err)
goto out;
if (nla_put_u32(skb, NHA_HW_STATS_USED, hw_stats_used))
goto err_out;
}
nest = nla_nest_start(skb, NHA_GROUP_STATS);
if (!nest)
goto err_out;
for (i = 0; i < nhg->num_nh; i++)
if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i],
op_flags))
goto cancel_out;
nla_nest_end(skb, nest);
return 0;
cancel_out:
nla_nest_cancel(skb, nest);
err_out:
err = -EMSGSIZE;
out:
return err;
}
static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
u32 op_flags)
{ {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nexthop_grp *p; struct nexthop_grp *p;
size_t len = nhg->num_nh * sizeof(*p); size_t len = nhg->num_nh * sizeof(*p);
struct nlattr *nla; struct nlattr *nla;
...@@ -684,6 +892,11 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) ...@@ -684,6 +892,11 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
if (nhg->resilient && nla_put_nh_group_res(skb, nhg)) if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
goto nla_put_failure; goto nla_put_failure;
if (op_flags & NHA_OP_FLAG_DUMP_STATS &&
(nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) ||
nla_put_nh_group_stats(skb, nh, op_flags)))
goto nla_put_failure;
return 0; return 0;
nla_put_failure: nla_put_failure:
...@@ -691,7 +904,8 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) ...@@ -691,7 +904,8 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
} }
static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
int event, u32 portid, u32 seq, unsigned int nlflags) int event, u32 portid, u32 seq, unsigned int nlflags,
u32 op_flags)
{ {
struct fib6_nh *fib6_nh; struct fib6_nh *fib6_nh;
struct fib_nh *fib_nh; struct fib_nh *fib_nh;
...@@ -718,7 +932,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, ...@@ -718,7 +932,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
goto nla_put_failure; goto nla_put_failure;
if (nla_put_nh_group(skb, nhg)) if (nla_put_nh_group(skb, nh, op_flags))
goto nla_put_failure; goto nla_put_failure;
goto out; goto out;
} }
...@@ -849,7 +1063,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) ...@@ -849,7 +1063,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
if (!skb) if (!skb)
goto errout; goto errout;
err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0);
if (err < 0) { if (err < 0) {
/* -EMSGSIZE implies BUG in nh_nlmsg_size() */ /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE); WARN_ON(err == -EMSGSIZE);
...@@ -1104,6 +1318,7 @@ static int nh_check_attr_group(struct net *net, ...@@ -1104,6 +1318,7 @@ static int nh_check_attr_group(struct net *net,
if (!tb[i]) if (!tb[i])
continue; continue;
switch (i) { switch (i) {
case NHA_HW_STATS_ENABLE:
case NHA_FDB: case NHA_FDB:
continue; continue;
case NHA_RES_GROUP: case NHA_RES_GROUP:
...@@ -1176,6 +1391,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash) ...@@ -1176,6 +1391,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
if (hash > atomic_read(&nhge->hthr.upper_bound)) if (hash > atomic_read(&nhge->hthr.upper_bound))
continue; continue;
nh_grp_entry_stats_inc(nhge);
return nhge->nh; return nhge->nh;
} }
...@@ -1185,7 +1401,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash) ...@@ -1185,7 +1401,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
{ {
struct nexthop *rc = NULL; struct nh_grp_entry *nhge0 = NULL;
int i; int i;
if (nhg->fdb_nh) if (nhg->fdb_nh)
...@@ -1200,16 +1416,20 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) ...@@ -1200,16 +1416,20 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
if (!nexthop_is_good_nh(nhge->nh)) if (!nexthop_is_good_nh(nhge->nh))
continue; continue;
if (!rc) if (!nhge0)
rc = nhge->nh; nhge0 = nhge;
if (hash > atomic_read(&nhge->hthr.upper_bound)) if (hash > atomic_read(&nhge->hthr.upper_bound))
continue; continue;
nh_grp_entry_stats_inc(nhge);
return nhge->nh; return nhge->nh;
} }
return rc ? : nhg->nh_entries[0].nh; if (!nhge0)
nhge0 = &nhg->nh_entries[0];
nh_grp_entry_stats_inc(nhge0);
return nhge0->nh;
} }
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
...@@ -1225,6 +1445,7 @@ static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) ...@@ -1225,6 +1445,7 @@ static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
bucket = &res_table->nh_buckets[bucket_index]; bucket = &res_table->nh_buckets[bucket_index];
nh_res_bucket_set_busy(bucket); nh_res_bucket_set_busy(bucket);
nhge = rcu_dereference(bucket->nh_entry); nhge = rcu_dereference(bucket->nh_entry);
nh_grp_entry_stats_inc(nhge);
return nhge->nh; return nhge->nh;
} }
...@@ -1798,6 +2019,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, ...@@ -1798,6 +2019,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
newg->has_v4 = true; newg->has_v4 = true;
list_del(&nhges[i].nh_list); list_del(&nhges[i].nh_list);
new_nhges[j].stats = nhges[i].stats;
new_nhges[j].nh_parent = nhges[i].nh_parent; new_nhges[j].nh_parent = nhges[i].nh_parent;
new_nhges[j].nh = nhges[i].nh; new_nhges[j].nh = nhges[i].nh;
new_nhges[j].weight = nhges[i].weight; new_nhges[j].weight = nhges[i].weight;
...@@ -1813,6 +2035,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, ...@@ -1813,6 +2035,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
rcu_assign_pointer(nhp->nh_grp, newg); rcu_assign_pointer(nhp->nh_grp, newg);
list_del(&nhge->nh_list); list_del(&nhge->nh_list);
free_percpu(nhge->stats);
nexthop_put(nhge->nh); nexthop_put(nhge->nh);
/* Removal of a NH from a resilient group is notified through /* Removal of a NH from a resilient group is notified through
...@@ -2477,6 +2700,13 @@ static struct nexthop *nexthop_create_group(struct net *net, ...@@ -2477,6 +2700,13 @@ static struct nexthop *nexthop_create_group(struct net *net,
if (nhi->family == AF_INET) if (nhi->family == AF_INET)
nhg->has_v4 = true; nhg->has_v4 = true;
nhg->nh_entries[i].stats =
netdev_alloc_pcpu_stats(struct nh_grp_entry_stats);
if (!nhg->nh_entries[i].stats) {
err = -ENOMEM;
nexthop_put(nhe);
goto out_no_nh;
}
nhg->nh_entries[i].nh = nhe; nhg->nh_entries[i].nh = nhe;
nhg->nh_entries[i].weight = entry[i].weight + 1; nhg->nh_entries[i].weight = entry[i].weight + 1;
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
...@@ -2509,6 +2739,9 @@ static struct nexthop *nexthop_create_group(struct net *net, ...@@ -2509,6 +2739,9 @@ static struct nexthop *nexthop_create_group(struct net *net,
if (cfg->nh_fdb) if (cfg->nh_fdb)
nhg->fdb_nh = 1; nhg->fdb_nh = 1;
if (cfg->nh_hw_stats)
nhg->hw_stats = true;
rcu_assign_pointer(nh->nh_grp, nhg); rcu_assign_pointer(nh->nh_grp, nhg);
return nh; return nh;
...@@ -2516,6 +2749,7 @@ static struct nexthop *nexthop_create_group(struct net *net, ...@@ -2516,6 +2749,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
out_no_nh: out_no_nh:
for (i--; i >= 0; --i) { for (i--; i >= 0; --i) {
list_del(&nhg->nh_entries[i].nh_list); list_del(&nhg->nh_entries[i].nh_list);
free_percpu(nhg->nh_entries[i].stats);
nexthop_put(nhg->nh_entries[i].nh); nexthop_put(nhg->nh_entries[i].nh);
} }
...@@ -2850,6 +3084,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, ...@@ -2850,6 +3084,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP], err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
cfg, extack); cfg, extack);
if (tb[NHA_HW_STATS_ENABLE])
cfg->nh_hw_stats = nla_get_u32(tb[NHA_HW_STATS_ENABLE]);
/* no other attributes should be set */ /* no other attributes should be set */
goto out; goto out;
} }
...@@ -2941,6 +3178,10 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, ...@@ -2941,6 +3178,10 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out; goto out;
} }
if (tb[NHA_HW_STATS_ENABLE]) {
NL_SET_ERR_MSG(extack, "Cannot enable nexthop hardware statistics for non-group nexthops");
goto out;
}
err = 0; err = 0;
out: out:
...@@ -2966,9 +3207,9 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -2966,9 +3207,9 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
return err; return err;
} }
static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, static int nh_valid_get_del_req(const struct nlmsghdr *nlh,
struct nlattr **tb, u32 *id, struct nlattr **tb, u32 *id, u32 *op_flags,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct nhmsg *nhm = nlmsg_data(nlh); struct nhmsg *nhm = nlmsg_data(nlh);
...@@ -2988,22 +3229,12 @@ static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, ...@@ -2988,22 +3229,12 @@ static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
return -EINVAL; return -EINVAL;
} }
return 0; if (tb[NHA_OP_FLAGS])
} *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
else
static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id, *op_flags = 0;
struct netlink_ext_ack *extack)
{
struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
int err;
err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
ARRAY_SIZE(rtm_nh_policy_get) - 1,
rtm_nh_policy_get, extack);
if (err < 0)
return err;
return __nh_valid_get_del_req(nlh, tb, id, extack); return 0;
} }
/* rtnl */ /* rtnl */
...@@ -3011,16 +3242,23 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -3011,16 +3242,23 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
struct nlattr *tb[NHA_MAX + 1];
struct nl_info nlinfo = { struct nl_info nlinfo = {
.nlh = nlh, .nlh = nlh,
.nl_net = net, .nl_net = net,
.portid = NETLINK_CB(skb).portid, .portid = NETLINK_CB(skb).portid,
}; };
struct nexthop *nh; struct nexthop *nh;
u32 op_flags;
int err; int err;
u32 id; u32 id;
err = nh_valid_get_del_req(nlh, &id, extack); err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX,
rtm_nh_policy_del, extack);
if (err < 0)
return err;
err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack);
if (err) if (err)
return err; return err;
...@@ -3038,12 +3276,19 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, ...@@ -3038,12 +3276,19 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct net *net = sock_net(in_skb->sk); struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NHA_MAX + 1];
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
struct nexthop *nh; struct nexthop *nh;
u32 op_flags;
int err; int err;
u32 id; u32 id;
err = nh_valid_get_del_req(nlh, &id, extack); err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX,
rtm_nh_policy_get, extack);
if (err < 0)
return err;
err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack);
if (err) if (err)
return err; return err;
...@@ -3058,7 +3303,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, ...@@ -3058,7 +3303,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout_free; goto errout_free;
err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0); nlh->nlmsg_seq, 0, op_flags);
if (err < 0) { if (err < 0) {
WARN_ON(err == -EMSGSIZE); WARN_ON(err == -EMSGSIZE);
goto errout_free; goto errout_free;
...@@ -3079,6 +3324,7 @@ struct nh_dump_filter { ...@@ -3079,6 +3324,7 @@ struct nh_dump_filter {
bool group_filter; bool group_filter;
bool fdb_filter; bool fdb_filter;
u32 res_bucket_nh_id; u32 res_bucket_nh_id;
u32 op_flags;
}; };
static bool nh_dump_filtered(struct nexthop *nh, static bool nh_dump_filtered(struct nexthop *nh,
...@@ -3150,6 +3396,11 @@ static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, ...@@ -3150,6 +3396,11 @@ static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb,
return -EINVAL; return -EINVAL;
} }
if (tb[NHA_OP_FLAGS])
filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]);
else
filter->op_flags = 0;
return 0; return 0;
} }
...@@ -3157,11 +3408,10 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, ...@@ -3157,11 +3408,10 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh,
struct nh_dump_filter *filter, struct nh_dump_filter *filter,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; struct nlattr *tb[NHA_MAX + 1];
int err; int err;
err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX,
ARRAY_SIZE(rtm_nh_policy_dump) - 1,
rtm_nh_policy_dump, cb->extack); rtm_nh_policy_dump, cb->extack);
if (err < 0) if (err < 0)
return err; return err;
...@@ -3223,7 +3473,7 @@ static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, ...@@ -3223,7 +3473,7 @@ static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
NETLINK_CB(cb->skb).portid, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI); cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags);
} }
/* rtnl */ /* rtnl */
...@@ -3300,11 +3550,10 @@ static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh, ...@@ -3300,11 +3550,10 @@ static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)]; struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)];
struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)]; struct nlattr *tb[NHA_MAX + 1];
int err; int err;
err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX,
ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1,
rtm_nh_policy_dump_bucket, NULL); rtm_nh_policy_dump_bucket, NULL);
if (err < 0) if (err < 0)
return err; return err;
...@@ -3474,16 +3723,16 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, ...@@ -3474,16 +3723,16 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
u32 *id, u16 *bucket_index, u32 *id, u16 *bucket_index,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)]; struct nlattr *tb[NHA_MAX + 1];
u32 op_flags;
int err; int err;
err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX,
ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1,
rtm_nh_policy_get_bucket, extack); rtm_nh_policy_get_bucket, extack);
if (err < 0) if (err < 0)
return err; return err;
err = __nh_valid_get_del_req(nlh, tb, id, extack); err = nh_valid_get_del_req(nlh, tb, id, &op_flags, extack);
if (err) if (err)
return err; return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment