Commit 870c2481 authored by Moshe Shemesh's avatar Moshe Shemesh Committed by Saeed Mahameed

net/mlx5: cmdif, Print info on any firmware cmd failure to tracepoint

While moving to new CMD API (quiet API), some pre-existing flows may call the new API
function that in case of error, returns the error instead of printing it as previously done.
For such flows we bring back the print but to tracepoint this time for sys admins to
have the ability to check for errors especially for commands using the new quiet API.

Tracepoint output example:
         devlink-1333    [001] .....   822.746922: mlx5_cmd: ACCESS_REG(0x805) op_mod(0x0) failed, status bad resource(0x5), syndrome (0xb06e1f), err(-22)

Fixes: f23519e5 ("net/mlx5: cmdif, Add new api for command execution")
Signed-off-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Reviewed-by: default avatarShay Drory <shayd@nvidia.com>
Reviewed-by: default avatarMaor Gottlieb <maorg@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 4f57332d
......@@ -45,6 +45,8 @@
#include "mlx5_core.h"
#include "lib/eq.h"
#include "lib/tout.h"
#define CREATE_TRACE_POINTS
#include "diag/cmd_tracepoint.h"
enum {
CMD_IF_REV = 5,
......@@ -785,27 +787,14 @@ EXPORT_SYMBOL(mlx5_cmd_out_err);
static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out)
{
u16 opcode, op_mod;
u32 syndrome;
u8 status;
u16 uid;
int err;
syndrome = MLX5_GET(mbox_out, out, syndrome);
status = MLX5_GET(mbox_out, out, status);
opcode = MLX5_GET(mbox_in, in, opcode);
op_mod = MLX5_GET(mbox_in, in, op_mod);
uid = MLX5_GET(mbox_in, in, uid);
err = cmd_status_to_err(status);
if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
mlx5_cmd_out_err(dev, opcode, op_mod, out);
else
mlx5_core_dbg(dev,
"%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n",
mlx5_command_str(opcode), opcode, op_mod, uid,
cmd_status_str(status), status, syndrome, err);
}
int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out)
......@@ -1892,6 +1881,16 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
return err;
}
static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out)
{
u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
u8 status = MLX5_GET(mbox_out, out, status);
trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod,
cmd_status_str(status), status, syndrome,
cmd_status_to_err(status));
}
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
u32 syndrome, int err)
{
......@@ -1914,7 +1913,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
}
/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out)
{
u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
u8 status = MLX5_GET(mbox_out, out, status);
......@@ -1922,8 +1921,10 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *
if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
err = -EIO;
if (!err && status != MLX5_CMD_STAT_OK)
if (!err && status != MLX5_CMD_STAT_OK) {
err = -EREMOTEIO;
mlx5_cmd_err_trace(dev, opcode, op_mod, out);
}
cmd_status_log(dev, opcode, status, syndrome, err);
return err;
......@@ -1951,9 +1952,9 @@ int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int
{
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
u16 opcode = MLX5_GET(mbox_in, in, opcode);
u16 op_mod = MLX5_GET(mbox_in, in, op_mod);
err = cmd_status_err(dev, err, opcode, out);
return err;
return cmd_status_err(dev, err, opcode, op_mod, out);
}
EXPORT_SYMBOL(mlx5_cmd_do);
......@@ -1997,8 +1998,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
{
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
u16 opcode = MLX5_GET(mbox_in, in, opcode);
u16 op_mod = MLX5_GET(mbox_in, in, op_mod);
err = cmd_status_err(dev, err, opcode, out);
err = cmd_status_err(dev, err, opcode, op_mod, out);
return mlx5_cmd_check(dev, err, in, out);
}
EXPORT_SYMBOL(mlx5_cmd_exec_polling);
......@@ -2034,7 +2036,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
struct mlx5_async_ctx *ctx;
ctx = work->ctx;
status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out);
work->user_callback(status, work);
if (atomic_dec_and_test(&ctx->num_inflight))
complete(&ctx->inflight_done);
......@@ -2049,6 +2051,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
work->ctx = ctx;
work->user_callback = callback;
work->opcode = MLX5_GET(mbox_in, in, opcode);
work->op_mod = MLX5_GET(mbox_in, in, op_mod);
work->out = out;
if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
return -EIO;
......
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mlx5
#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _MLX5_CMD_TP_H_
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
TRACE_EVENT(mlx5_cmd,
TP_PROTO(const char *command_str, u16 opcode, u16 op_mod,
const char *status_str, u8 status, u32 syndrome, int err),
TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err),
TP_STRUCT__entry(__string(command_str, command_str)
__field(u16, opcode)
__field(u16, op_mod)
__string(status_str, status_str)
__field(u8, status)
__field(u32, syndrome)
__field(int, err)
),
TP_fast_assign(__assign_str(command_str, command_str);
__entry->opcode = opcode;
__entry->op_mod = op_mod;
__assign_str(status_str, status_str);
__entry->status = status;
__entry->syndrome = syndrome;
__entry->err = err;
),
TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)",
__get_str(command_str), __entry->opcode, __entry->op_mod,
__get_str(status_str), __entry->status, __entry->syndrome,
__entry->err)
);
#endif /* _MLX5_CMD_TP_H_ */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ./diag
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE cmd_tracepoint
#include <trace/define_trace.h>
......@@ -981,6 +981,7 @@ struct mlx5_async_work {
struct mlx5_async_ctx *ctx;
mlx5_async_cbk_t user_callback;
u16 opcode; /* cmd opcode */
u16 op_mod; /* cmd op_mod */
void *out; /* pointer to the cmd output buffer */
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment