Commit 307b4e0b authored by David S. Miller's avatar David S. Miller

Merge branch 'Devlink-health-auto-attributes-refactor'

Eran Ben Elisha says:

====================
Devlink health auto attributes refactor

This patchset refactors the auto-recover health reporter flag to be
explicitly set by the devlink core.
In addition, add another flag to control auto-dump attribute, also
to be explicitly set by the devlink core.

For that, patch 0001 changes the auto-recover default value of
netdevsim dummy reporter.

After reporter registration, both flags can be altered be administrator
only.

Changes since v1:
- Change default behaviour of netdevsim dummy reporter
- Move initialization of DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 62582a7e 48bb52c8
...@@ -150,7 +150,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp) ...@@ -150,7 +150,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
health->fw_reset_reporter = health->fw_reset_reporter =
devlink_health_reporter_create(bp->dl, devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_reset_reporter_ops, &bnxt_dl_fw_reset_reporter_ops,
0, true, bp); 0, bp);
if (IS_ERR(health->fw_reset_reporter)) { if (IS_ERR(health->fw_reset_reporter)) {
netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n", netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
PTR_ERR(health->fw_reset_reporter)); PTR_ERR(health->fw_reset_reporter));
...@@ -166,7 +166,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp) ...@@ -166,7 +166,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
health->fw_reporter = health->fw_reporter =
devlink_health_reporter_create(bp->dl, devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_reporter_ops, &bnxt_dl_fw_reporter_ops,
0, false, bp); 0, bp);
if (IS_ERR(health->fw_reporter)) { if (IS_ERR(health->fw_reporter)) {
netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n", netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
PTR_ERR(health->fw_reporter)); PTR_ERR(health->fw_reporter));
...@@ -182,7 +182,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp) ...@@ -182,7 +182,7 @@ void bnxt_dl_fw_reporters_create(struct bnxt *bp)
health->fw_fatal_reporter = health->fw_fatal_reporter =
devlink_health_reporter_create(bp->dl, devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_fatal_reporter_ops, &bnxt_dl_fw_fatal_reporter_ops,
0, true, bp); 0, bp);
if (IS_ERR(health->fw_fatal_reporter)) { if (IS_ERR(health->fw_fatal_reporter)) {
netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n", netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
PTR_ERR(health->fw_fatal_reporter)); PTR_ERR(health->fw_fatal_reporter));
......
...@@ -571,7 +571,7 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) ...@@ -571,7 +571,7 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
reporter = devlink_health_reporter_create(devlink, reporter = devlink_health_reporter_create(devlink,
&mlx5_rx_reporter_ops, &mlx5_rx_reporter_ops,
MLX5E_REPORTER_RX_GRACEFUL_PERIOD, MLX5E_REPORTER_RX_GRACEFUL_PERIOD,
true, priv); priv);
if (IS_ERR(reporter)) { if (IS_ERR(reporter)) {
netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
PTR_ERR(reporter)); PTR_ERR(reporter));
......
...@@ -416,7 +416,7 @@ int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) ...@@ -416,7 +416,7 @@ int mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
reporter = reporter =
devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
MLX5_REPORTER_TX_GRACEFUL_PERIOD, MLX5_REPORTER_TX_GRACEFUL_PERIOD,
true, priv); priv);
if (IS_ERR(reporter)) { if (IS_ERR(reporter)) {
netdev_warn(priv->netdev, netdev_warn(priv->netdev,
"Failed to create tx reporter, err = %ld\n", "Failed to create tx reporter, err = %ld\n",
......
...@@ -627,7 +627,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) ...@@ -627,7 +627,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
health->fw_reporter = health->fw_reporter =
devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
0, false, dev); 0, dev);
if (IS_ERR(health->fw_reporter)) if (IS_ERR(health->fw_reporter))
mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
PTR_ERR(health->fw_reporter)); PTR_ERR(health->fw_reporter));
...@@ -636,7 +636,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) ...@@ -636,7 +636,7 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
devlink_health_reporter_create(devlink, devlink_health_reporter_create(devlink,
&mlx5_fw_fatal_reporter_ops, &mlx5_fw_fatal_reporter_ops,
MLX5_REPORTER_FW_GRACEFUL_PERIOD, MLX5_REPORTER_FW_GRACEFUL_PERIOD,
true, dev); dev);
if (IS_ERR(health->fw_fatal_reporter)) if (IS_ERR(health->fw_fatal_reporter))
mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
PTR_ERR(health->fw_fatal_reporter)); PTR_ERR(health->fw_fatal_reporter));
......
...@@ -271,14 +271,14 @@ int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) ...@@ -271,14 +271,14 @@ int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink)
health->empty_reporter = health->empty_reporter =
devlink_health_reporter_create(devlink, devlink_health_reporter_create(devlink,
&nsim_dev_empty_reporter_ops, &nsim_dev_empty_reporter_ops,
0, false, health); 0, health);
if (IS_ERR(health->empty_reporter)) if (IS_ERR(health->empty_reporter))
return PTR_ERR(health->empty_reporter); return PTR_ERR(health->empty_reporter);
health->dummy_reporter = health->dummy_reporter =
devlink_health_reporter_create(devlink, devlink_health_reporter_create(devlink,
&nsim_dev_dummy_reporter_ops, &nsim_dev_dummy_reporter_ops,
0, false, health); 0, health);
if (IS_ERR(health->dummy_reporter)) { if (IS_ERR(health->dummy_reporter)) {
err = PTR_ERR(health->dummy_reporter); err = PTR_ERR(health->dummy_reporter);
goto err_empty_reporter_destroy; goto err_empty_reporter_destroy;
......
...@@ -1040,8 +1040,7 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, ...@@ -1040,8 +1040,7 @@ int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
struct devlink_health_reporter * struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink, devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops, const struct devlink_health_reporter_ops *ops,
u64 graceful_period, bool auto_recover, u64 graceful_period, void *priv);
void *priv);
void void
devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
......
...@@ -429,6 +429,8 @@ enum devlink_attr { ...@@ -429,6 +429,8 @@ enum devlink_attr {
DEVLINK_ATTR_NETNS_FD, /* u32 */ DEVLINK_ATTR_NETNS_FD, /* u32 */
DEVLINK_ATTR_NETNS_PID, /* u32 */ DEVLINK_ATTR_NETNS_PID, /* u32 */
DEVLINK_ATTR_NETNS_ID, /* u32 */ DEVLINK_ATTR_NETNS_ID, /* u32 */
DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, /* u8 */
/* add new attributes above here, update the policy in devlink.c */ /* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX, __DEVLINK_ATTR_MAX,
......
...@@ -5089,6 +5089,7 @@ struct devlink_health_reporter { ...@@ -5089,6 +5089,7 @@ struct devlink_health_reporter {
struct mutex dump_lock; /* lock parallel read/write from dump buffers */ struct mutex dump_lock; /* lock parallel read/write from dump buffers */
u64 graceful_period; u64 graceful_period;
bool auto_recover; bool auto_recover;
bool auto_dump;
u8 health_state; u8 health_state;
u64 dump_ts; u64 dump_ts;
u64 dump_real_ts; u64 dump_real_ts;
...@@ -5124,14 +5125,12 @@ devlink_health_reporter_find_by_name(struct devlink *devlink, ...@@ -5124,14 +5125,12 @@ devlink_health_reporter_find_by_name(struct devlink *devlink,
* @devlink: devlink * @devlink: devlink
* @ops: ops * @ops: ops
* @graceful_period: to avoid recovery loops, in msecs * @graceful_period: to avoid recovery loops, in msecs
* @auto_recover: auto recover when error occurs
* @priv: priv * @priv: priv
*/ */
struct devlink_health_reporter * struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink, devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops, const struct devlink_health_reporter_ops *ops,
u64 graceful_period, bool auto_recover, u64 graceful_period, void *priv)
void *priv)
{ {
struct devlink_health_reporter *reporter; struct devlink_health_reporter *reporter;
...@@ -5141,8 +5140,7 @@ devlink_health_reporter_create(struct devlink *devlink, ...@@ -5141,8 +5140,7 @@ devlink_health_reporter_create(struct devlink *devlink,
goto unlock; goto unlock;
} }
if (WARN_ON(auto_recover && !ops->recover) || if (WARN_ON(graceful_period && !ops->recover)) {
WARN_ON(graceful_period && !ops->recover)) {
reporter = ERR_PTR(-EINVAL); reporter = ERR_PTR(-EINVAL);
goto unlock; goto unlock;
} }
...@@ -5157,7 +5155,8 @@ devlink_health_reporter_create(struct devlink *devlink, ...@@ -5157,7 +5155,8 @@ devlink_health_reporter_create(struct devlink *devlink,
reporter->ops = ops; reporter->ops = ops;
reporter->devlink = devlink; reporter->devlink = devlink;
reporter->graceful_period = graceful_period; reporter->graceful_period = graceful_period;
reporter->auto_recover = auto_recover; reporter->auto_recover = !!ops->recover;
reporter->auto_dump = !!ops->dump;
mutex_init(&reporter->dump_lock); mutex_init(&reporter->dump_lock);
refcount_set(&reporter->refcount, 1); refcount_set(&reporter->refcount, 1);
list_add_tail(&reporter->list, &devlink->reporter_list); list_add_tail(&reporter->list, &devlink->reporter_list);
...@@ -5238,6 +5237,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, ...@@ -5238,6 +5237,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
reporter->dump_real_ts, DEVLINK_ATTR_PAD)) reporter->dump_real_ts, DEVLINK_ATTR_PAD))
goto reporter_nest_cancel; goto reporter_nest_cancel;
if (reporter->ops->dump &&
nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
reporter->auto_dump))
goto reporter_nest_cancel;
nla_nest_end(msg, reporter_attr); nla_nest_end(msg, reporter_attr);
genlmsg_end(msg, hdr); genlmsg_end(msg, hdr);
...@@ -5384,10 +5387,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter, ...@@ -5384,10 +5387,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
if (reporter->auto_dump) {
mutex_lock(&reporter->dump_lock); mutex_lock(&reporter->dump_lock);
/* store current dump of current error, for later analysis */ /* store current dump of current error, for later analysis */
devlink_health_do_dump(reporter, priv_ctx, NULL); devlink_health_do_dump(reporter, priv_ctx, NULL);
mutex_unlock(&reporter->dump_lock); mutex_unlock(&reporter->dump_lock);
}
if (reporter->auto_recover) if (reporter->auto_recover)
return devlink_health_reporter_recover(reporter, return devlink_health_reporter_recover(reporter,
...@@ -5561,6 +5566,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, ...@@ -5561,6 +5566,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
goto out; goto out;
} }
if (!reporter->ops->dump &&
info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) {
err = -EOPNOTSUPP;
goto out;
}
if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
reporter->graceful_period = reporter->graceful_period =
...@@ -5570,6 +5580,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, ...@@ -5570,6 +5580,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
reporter->auto_recover = reporter->auto_recover =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
reporter->auto_dump =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
devlink_health_reporter_put(reporter); devlink_health_reporter_put(reporter);
return 0; return 0;
out: out:
...@@ -6316,6 +6330,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { ...@@ -6316,6 +6330,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
}; };
static const struct genl_ops devlink_nl_ops[] = { static const struct genl_ops devlink_nl_ops[] = {
......
...@@ -377,6 +377,11 @@ dummy_reporter_test() ...@@ -377,6 +377,11 @@ dummy_reporter_test()
{ {
RET=0 RET=0
check_reporter_info dummy healthy 0 0 0 true
devlink health set $DL_HANDLE reporter dummy auto_recover false
check_err $? "Failed to dummy reporter auto_recover option"
check_reporter_info dummy healthy 0 0 0 false check_reporter_info dummy healthy 0 0 0 false
local BREAK_MSG="foo bar" local BREAK_MSG="foo bar"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment