Commit 6e2135ce authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2018-03-27' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2018-03-27 (Misc updates & SQ recovery)

This series contains Misc updates and cleanups for mlx5e rx path
and SQ recovery feature for tx path.

From Tariq: (RX updates)
    - Disable Striding RQ when PCI devices, striding RQ limits the use
      of CQE compression feature, which is very critical for slow PCI
      devices performance, in this change we will prefer CQE compression
      over Striding RQ only on specific "slow"  PCIe links.
    - RX path cleanups
    - Private flag to enable/disable striding RQ

From Eran: (TX fast recovery)
    - TX timeout logic improvements, fast SQ recovery and TX error reporting
      if a HW error occurs while transmitting on a specific SQ, the driver will
      ignore such error and will wait for TX timeout to occur and reset all
      the rings. Instead, the current series improves the resiliency for such
      HW errors by detecting TX completions with errors, which will report them
      and perform a fast recover for the specific faulty SQ even before a TX
      timeout is detected.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 038d49ba db75373c
......@@ -267,14 +267,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
{
__be32 *p = (__be32 *)cqe;
int i;
mlx5_ib_warn(dev, "dump error cqe\n");
for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
be32_to_cpu(p[1]), be32_to_cpu(p[2]),
be32_to_cpu(p[3]));
mlx5_dump_err_cqe(dev->mdev, cqe);
}
static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
......
......@@ -4739,26 +4739,14 @@ static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
u8 *sq_state)
{
void *out;
void *sqc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(query_sq_out);
out = kvzalloc(inlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
err = mlx5_core_query_sq_state(dev->mdev, sq->base.mqp.qpn, sq_state);
if (err)
goto out;
sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
*sq_state = MLX5_GET(sqc, sqc, state);
sq->state = *sq_state;
out:
kvfree(out);
return err;
}
......
......@@ -93,8 +93,6 @@
#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \
MLX5_MPWRQ_WQE_PAGE_ORDER)
#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
#define MLX5E_REQUIRED_MTTS(wqes) \
......@@ -124,6 +122,7 @@
#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
#define MLX5E_TX_CQ_POLL_BUDGET 128
#define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */
#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
#define MLX5E_ICOSQ_MAX_WQEBBS \
(DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
......@@ -207,12 +206,14 @@ static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
"rx_cqe_moder",
"tx_cqe_moder",
"rx_cqe_compress",
"rx_striding_rq",
};
enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
};
#define MLX5E_SET_PFLAG(params, pflag, enable) \
......@@ -232,9 +233,6 @@ enum mlx5e_priv_flag {
struct mlx5e_params {
u8 log_sq_size;
u8 rq_wq_type;
u16 rq_headroom;
u8 mpwqe_log_stride_sz;
u8 mpwqe_log_num_strides;
u8 log_rq_size;
u16 num_channels;
u8 num_tc;
......@@ -243,7 +241,6 @@ struct mlx5e_params {
struct net_dim_cq_moder tx_cq_moderation;
bool lro_en;
u32 lro_wqe_sz;
u16 tx_max_inline;
u8 tx_min_inline_mode;
u8 rss_hfunc;
u8 toeplitz_hash_key[40];
......@@ -336,6 +333,7 @@ struct mlx5e_sq_dma {
enum {
MLX5E_SQ_STATE_ENABLED,
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
};
......@@ -369,7 +367,6 @@ struct mlx5e_txqsq {
void __iomem *uar_map;
struct netdev_queue *txq;
u32 sqn;
u16 max_inline;
u8 min_inline_mode;
u16 edge;
struct device *pdev;
......@@ -383,6 +380,10 @@ struct mlx5e_txqsq {
struct mlx5e_channel *channel;
int txq_ix;
u32 rate_limit;
struct mlx5e_txqsq_recover {
struct work_struct recover_work;
u64 last_recover;
} recover;
} ____cacheline_aligned_in_smp;
struct mlx5e_xdpsq {
......@@ -832,6 +833,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
bool recycle);
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
......@@ -842,6 +847,11 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
void mlx5e_update_stats(struct mlx5e_priv *priv);
int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
......@@ -917,9 +927,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
u8 cq_period_mode);
void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
u8 cq_period_mode);
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
u8 rq_type);
struct mlx5e_params *params);
static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
{
......@@ -1011,7 +1021,6 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
#endif
u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
int mlx5e_create_tir(struct mlx5_core_dev *mdev,
struct mlx5e_tir *tir, u32 *in, int inlen);
void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
......
......@@ -231,8 +231,8 @@ static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
return num_wqe;
stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz;
num_strides = 1 << priv->channels.params.mpwqe_log_num_strides;
stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params);
num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params);
wqe_size = stride_size * num_strides;
packets_per_wqe = wqe_size /
......@@ -252,8 +252,8 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
return num_packets;
stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz;
num_strides = 1 << priv->channels.params.mpwqe_log_num_strides;
stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params);
num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params);
wqe_size = stride_size * num_strides;
num_packets = (1 << order_base_2(num_packets));
......@@ -1118,13 +1118,9 @@ static int mlx5e_get_tunable(struct net_device *dev,
const struct ethtool_tunable *tuna,
void *data)
{
const struct mlx5e_priv *priv = netdev_priv(dev);
int err = 0;
int err;
switch (tuna->id) {
case ETHTOOL_TX_COPYBREAK:
*(u32 *)data = priv->channels.params.tx_max_inline;
break;
case ETHTOOL_PFC_PREVENTION_TOUT:
err = mlx5e_get_pfc_prevention_tout(dev, data);
break;
......@@ -1141,35 +1137,11 @@ static int mlx5e_set_tunable(struct net_device *dev,
const void *data)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_channels new_channels = {};
int err = 0;
u32 val;
int err;
mutex_lock(&priv->state_lock);
switch (tuna->id) {
case ETHTOOL_TX_COPYBREAK:
val = *(u32 *)data;
if (val > mlx5e_get_max_inline_cap(mdev)) {
err = -EINVAL;
break;
}
new_channels.params = priv->channels.params;
new_channels.params.tx_max_inline = val;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
break;
}
err = mlx5e_open_channels(priv, &new_channels);
if (err)
break;
mlx5e_switch_priv_channels(priv, &new_channels, NULL);
break;
case ETHTOOL_PFC_PREVENTION_TOUT:
err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data);
break;
......@@ -1561,11 +1533,6 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
new_channels.params = priv->channels.params;
MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
new_channels.params.mpwqe_log_stride_sz =
MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
new_channels.params.mpwqe_log_num_strides =
MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
return 0;
......@@ -1603,6 +1570,38 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
return 0;
}
static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_channels new_channels = {};
int err;
if (enable) {
if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
return -EOPNOTSUPP;
if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
return -EINVAL;
}
new_channels.params = priv->channels.params;
MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
mlx5e_set_rq_type(mdev, &new_channels.params);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
return 0;
}
err = mlx5e_open_channels(priv, &new_channels);
if (err)
return err;
mlx5e_switch_priv_channels(priv, &new_channels, NULL);
return 0;
}
static int mlx5e_handle_pflag(struct net_device *netdev,
u32 wanted_flags,
enum mlx5e_priv_flag flag,
......@@ -1648,6 +1647,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
err = mlx5e_handle_pflag(netdev, pflags,
MLX5E_PFLAG_RX_CQE_COMPRESS,
set_pflag_rx_cqe_compress);
if (err)
goto out;
err = mlx5e_handle_pflag(netdev, pflags,
MLX5E_PFLAG_RX_STRIDING_RQ,
set_pflag_rx_striding_rq);
out:
mutex_unlock(&priv->state_lock);
......
......@@ -71,56 +71,80 @@ struct mlx5e_channel_param {
struct mlx5e_cq_param icosq_cq;
};
static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
return MLX5_CAP_GEN(mdev, striding_rq) &&
MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
MLX5_CAP_ETH(mdev, reg_umr_sq);
}
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
return MLX5E_MPWQE_STRIDE_SZ(mdev,
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
}
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
return MLX5_MPWRQ_LOG_WQE_SZ -
mlx5e_mpwqe_get_log_stride_size(mdev, params);
}
static u16 mlx5e_get_rq_headroom(struct mlx5e_params *params)
{
u16 linear_rq_headroom = params->xdp_prog ?
XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
linear_rq_headroom += NET_IP_ALIGN;
if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)
return linear_rq_headroom;
return 0;
}
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params, u8 rq_type)
struct mlx5e_params *params)
{
params->rq_wq_type = rq_type;
params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
params->log_rq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
params->mpwqe_log_stride_sz;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
params->log_rq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
params->rq_headroom = params->xdp_prog ?
XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
params->rq_headroom += NET_IP_ALIGN;
/* Extra room needed for build_skb */
params->lro_wqe_sz -= params->rq_headroom +
params->lro_wqe_sz -= mlx5e_get_rq_headroom(params) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
}
mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
BIT(params->log_rq_size),
BIT(params->mpwqe_log_stride_sz),
BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
}
static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
!params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
MLX5_WQ_TYPE_LINKED_LIST;
mlx5e_init_rq_type_params(mdev, params, rq_type);
return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
!params->xdp_prog && !MLX5_IPSEC_DEV(mdev);
}
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
MLX5_WQ_TYPE_LINKED_LIST;
}
static void mlx5e_update_carrier(struct mlx5e_priv *priv)
......@@ -153,26 +177,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
mutex_unlock(&priv->state_lock);
}
static void mlx5e_tx_timeout_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
tx_timeout_work);
int err;
rtnl_lock();
mutex_lock(&priv->state_lock);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
mlx5e_close_locked(priv->netdev);
err = mlx5e_open_locked(priv->netdev);
if (err)
netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
err);
unlock:
mutex_unlock(&priv->state_lock);
rtnl_unlock();
}
void mlx5e_update_stats(struct mlx5e_priv *priv)
{
int i;
......@@ -428,7 +432,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
rq->buff.headroom = params->rq_headroom;
rq->buff.headroom = mlx5e_get_rq_headroom(params);
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
......@@ -450,8 +454,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz;
rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides);
rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
......@@ -952,6 +956,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
return 0;
}
static void mlx5e_sq_recover(struct work_struct *work);
static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int txq_ix,
struct mlx5e_params *params,
......@@ -969,8 +974,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->channel = c;
sq->txq_ix = txq_ix;
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->max_inline = params->tx_max_inline;
sq->min_inline_mode = params->tx_min_inline_mode;
INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
......@@ -1037,6 +1042,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index);
......@@ -1155,9 +1161,20 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c,
return err;
}
static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
{
WARN_ONCE(sq->cc != sq->pc,
"SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
sq->sqn, sq->cc, sq->pc);
sq->cc = 0;
sq->dma_fifo_cc = 0;
sq->pc = 0;
}
static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
{
sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
netdev_tx_reset_queue(sq->txq);
netif_tx_start_queue(sq->txq);
......@@ -1202,6 +1219,107 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
mlx5e_free_txqsq(sq);
}
static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
{
unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
while (time_before(jiffies, exp_time)) {
if (sq->cc == sq->pc)
return 0;
msleep(20);
}
netdev_err(sq->channel->netdev,
"Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
sq->sqn, sq->cc, sq->pc);
return -ETIMEDOUT;
}
static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
{
struct mlx5_core_dev *mdev = sq->channel->mdev;
struct net_device *dev = sq->channel->netdev;
struct mlx5e_modify_sq_param msp = {0};
int err;
msp.curr_state = curr_state;
msp.next_state = MLX5_SQC_STATE_RST;
err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
if (err) {
netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
return err;
}
memset(&msp, 0, sizeof(msp));
msp.curr_state = MLX5_SQC_STATE_RST;
msp.next_state = MLX5_SQC_STATE_RDY;
err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
if (err) {
netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
return err;
}
return 0;
}
static void mlx5e_sq_recover(struct work_struct *work)
{
struct mlx5e_txqsq_recover *recover =
container_of(work, struct mlx5e_txqsq_recover,
recover_work);
struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
recover);
struct mlx5_core_dev *mdev = sq->channel->mdev;
struct net_device *dev = sq->channel->netdev;
u8 state;
int err;
err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
if (err) {
netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
sq->sqn, err);
return;
}
if (state != MLX5_RQC_STATE_ERR) {
netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
return;
}
netif_tx_disable_queue(sq->txq);
if (mlx5e_wait_for_sq_flush(sq))
return;
/* If the interval between two consecutive recovers per SQ is too
* short, don't recover to avoid infinite loop of ERR_CQE -> recover.
* If we reached this state, there is probably a bug that needs to be
* fixed. let's keep the queue close and let tx timeout cleanup.
*/
if (jiffies_to_msecs(jiffies - recover->last_recover) <
MLX5E_SQ_RECOVER_MIN_INTERVAL) {
netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
sq->sqn);
return;
}
/* At this point, no new packets will arrive from the stack as TXQ is
* marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
* pending WQEs. SQ can safely reset the SQ.
*/
if (mlx5e_sq_to_ready(sq, state))
return;
mlx5e_reset_txqsq_cc_pc(sq);
sq->stats.recover++;
recover->last_recover = jiffies;
mlx5e_activate_txqsq(sq);
}
static int mlx5e_open_icosq(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_sq_param *param,
......@@ -1742,13 +1860,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
struct mlx5e_params *params,
struct mlx5e_rq_param *param)
{
struct mlx5_core_dev *mdev = priv->mdev;
void *rqc = param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
MLX5_SET(wq, wq, log_wqe_num_of_strides, params->mpwqe_log_num_strides - 9);
MLX5_SET(wq, wq, log_wqe_stride_size, params->mpwqe_log_stride_sz - 6);
MLX5_SET(wq, wq, log_wqe_num_of_strides,
mlx5e_mpwqe_get_log_num_strides(mdev, params) - 9);
MLX5_SET(wq, wq, log_wqe_stride_size,
mlx5e_mpwqe_get_log_stride_size(mdev, params) - 6);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
......@@ -1758,12 +1879,12 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_size);
MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn);
MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn);
MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
param->wq.linear = 1;
}
......@@ -1822,7 +1943,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
log_cq_size = params->log_rq_size + params->mpwqe_log_num_strides;
log_cq_size = params->log_rq_size +
mlx5e_mpwqe_get_log_num_strides(priv->mdev, params);
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
log_cq_size = params->log_rq_size;
......@@ -3631,13 +3753,19 @@ static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
return true;
}
static void mlx5e_tx_timeout(struct net_device *dev)
static void mlx5e_tx_timeout_work(struct work_struct *work)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
tx_timeout_work);
struct net_device *dev = priv->netdev;
bool reopen_channels = false;
int i;
int i, err;
netdev_err(dev, "TX timeout detected\n");
rtnl_lock();
mutex_lock(&priv->state_lock);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
......@@ -3645,7 +3773,9 @@ static void mlx5e_tx_timeout(struct net_device *dev)
if (!netif_xmit_stopped(dev_queue))
continue;
netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
netdev_err(dev,
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
jiffies_to_usecs(jiffies - dev_queue->trans_start));
......@@ -3658,8 +3788,27 @@ static void mlx5e_tx_timeout(struct net_device *dev)
}
}
if (reopen_channels && test_bit(MLX5E_STATE_OPENED, &priv->state))
schedule_work(&priv->tx_timeout_work);
if (!reopen_channels)
goto unlock;
mlx5e_close_locked(dev);
err = mlx5e_open_locked(dev);
if (err)
netdev_err(priv->netdev,
"mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
err);
unlock:
mutex_unlock(&priv->state_lock);
rtnl_unlock();
}
static void mlx5e_tx_timeout(struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
netdev_err(dev, "TX timeout detected\n");
queue_work(priv->wq, &priv->tx_timeout_work);
}
static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
......@@ -3709,7 +3858,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
bpf_prog_put(old_prog);
if (reset) /* change RQ type according to priv->xdp_prog */
mlx5e_set_rq_params(priv->mdev, &priv->channels.params);
mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
if (was_opened && reset)
mlx5e_open_locked(netdev);
......@@ -3854,15 +4003,6 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
return 0;
}
u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
{
int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
return bf_buf_size -
sizeof(struct mlx5e_tx_wqe) +
2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
}
void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
int num_channels)
{
......@@ -3902,16 +4042,20 @@ static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
return 0;
}
static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
{
return (link_speed && pci_bw &&
(pci_bw < 40000) && (pci_bw < link_speed));
}
u32 link_speed = 0;
u32 pci_bw = 0;
static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw)
{
return !(link_speed && pci_bw &&
(pci_bw <= 16000) && (pci_bw < link_speed));
mlx5e_get_max_linkspeed(mdev, &link_speed);
mlx5e_get_pci_bw(mdev, &pci_bw);
mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
link_speed, pci_bw);
#define MLX5E_SLOW_PCI_RATIO (2)
return link_speed && pci_bw &&
link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
}
void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
......@@ -3980,17 +4124,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
u16 max_channels)
{
u8 cq_period_mode = 0;
u32 link_speed = 0;
u32 pci_bw = 0;
params->num_channels = max_channels;
params->num_tc = 1;
mlx5e_get_max_linkspeed(mdev, &link_speed);
mlx5e_get_pci_bw(mdev, &pci_bw);
mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
link_speed, pci_bw);
/* SQ */
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
......@@ -4000,18 +4137,22 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
params->rx_cqe_compress_def = false;
if (MLX5_CAP_GEN(mdev, cqe_compression) &&
MLX5_CAP_GEN(mdev, vport_group_manager))
params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw);
params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
/* RQ */
mlx5e_set_rq_params(mdev, params);
if (mlx5e_striding_rq_possible(mdev, params))
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ,
!slow_pci_heuristic(mdev));
mlx5e_set_rq_type(mdev, params);
mlx5e_init_rq_type_params(mdev, params);
/* HW LRO */
/* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
params->lro_en = hw_lro_heuristic(link_speed, pci_bw);
params->lro_en = !slow_pci_heuristic(mdev);
params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
......@@ -4023,7 +4164,6 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
mlx5e_set_tx_cq_mode_params(params, cq_period_mode);
/* TX inline */
params->tx_max_inline = mlx5e_get_max_inline_cap(mdev);
params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
/* RSS */
......
......@@ -884,7 +884,6 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
params->tx_max_inline = mlx5e_get_max_inline_cap(mdev);
params->num_tc = 1;
params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
......
......@@ -333,9 +333,8 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev,
len = ALIGN(headlen_pg, sizeof(long));
dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len,
DMA_FROM_DEVICE);
skb_copy_to_linear_data_offset(skb, 0,
page_address(dma_info->page) + offset,
len);
skb_copy_to_linear_data(skb, page_address(dma_info->page) + offset, len);
if (unlikely(offset + headlen > PAGE_SIZE)) {
dma_info++;
headlen_pg = len;
......@@ -870,10 +869,8 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
data = va + rx_headroom;
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
dma_sync_single_range_for_cpu(rq->pdev,
di->addr + wi->offset,
0, frag_size,
DMA_FROM_DEVICE);
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
prefetch(data);
wi->offset += frag_size;
......
......@@ -60,6 +60,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
......@@ -153,6 +155,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_queue_stopped += sq_stats->stopped;
s->tx_queue_wake += sq_stats->wake;
s->tx_queue_dropped += sq_stats->dropped;
s->tx_cqe_err += sq_stats->cqe_err;
s->tx_recover += sq_stats->recover;
s->tx_xmit_more += sq_stats->xmit_more;
s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
s->tx_csum_none += sq_stats->csum_none;
......@@ -1103,6 +1107,8 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) },
};
static const struct counter_desc ch_stats_desc[] = {
......
......@@ -78,6 +78,8 @@ struct mlx5e_sw_stats {
u64 tx_queue_wake;
u64 tx_queue_dropped;
u64 tx_xmit_more;
u64 tx_cqe_err;
u64 tx_recover;
u64 rx_wqe_err;
u64 rx_mpwqe_filler;
u64 rx_buff_alloc_err;
......@@ -197,6 +199,8 @@ struct mlx5e_sq_stats {
u64 stopped;
u64 wake;
u64 dropped;
u64 cqe_err;
u64 recover;
};
struct mlx5e_ch_stats {
......
......@@ -417,6 +417,18 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
return mlx5e_sq_xmit(sq, skb, wqe, pi);
}
static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
struct mlx5_err_cqe *err_cqe)
{
u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
netdev_err(sq->channel->netdev,
"Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
err_cqe->vendor_err_synd);
mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_txqsq *sq;
......@@ -456,6 +468,17 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
&sq->state)) {
mlx5e_dump_error_cqe(sq,
(struct mlx5_err_cqe *)cqe);
queue_work(cq->channel->priv->wq,
&sq->recover.recover_work);
}
sq->stats.cqe_err++;
}
do {
struct mlx5e_tx_wqe_info *wi;
struct sk_buff *skb;
......@@ -509,7 +532,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
netdev_tx_completed_queue(sq->txq, npkts, nbytes);
if (netif_tx_queue_stopped(sq->txq) &&
mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM)) {
mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
MLX5E_SQ_STOP_ROOM) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
sq->stats.wake++;
}
......
......@@ -56,7 +56,9 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
/* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false);
mlx5e_set_rq_type(mdev, params);
mlx5e_init_rq_type_params(mdev, params);
/* RQ size in ipoib by default is 512 */
params->log_rq_size = is_kdump_kernel() ?
......
......@@ -50,6 +50,11 @@ extern uint mlx5_core_debug_mask;
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
#define mlx5_core_dbg_once(__dev, format, ...) \
dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
__func__, __LINE__, current->pid, \
##__VA_ARGS__)
#define mlx5_core_dbg_mask(__dev, mask, format, ...) \
do { \
if ((mask) & mlx5_core_debug_mask) \
......
......@@ -157,6 +157,31 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
}
EXPORT_SYMBOL(mlx5_core_query_sq);
int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state)
{
void *out;
void *sqc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(query_sq_out);
out = kvzalloc(inlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
err = mlx5_core_query_sq(dev, sqn, out);
if (err)
goto out;
sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
*state = MLX5_GET(sqc, sqc, state);
out:
kvfree(out);
return err;
}
EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state);
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tirn)
{
......
......@@ -193,6 +193,12 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
struct mlx5_core_cq *cq, u16 cq_period,
u16 cq_max_count);
static inline void mlx5_dump_err_cqe(struct mlx5_core_dev *dev,
struct mlx5_err_cqe *err_cqe)
{
print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe,
sizeof(*err_cqe), false);
}
int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
......
......@@ -47,6 +47,7 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state);
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tirn);
int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment