Commit 8bde261e authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2018-03-30' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2018-03-30

This series contains updates to mlx5 core and mlx5e netdev drivers.
The main highlight of this series is the RX optimizations for striding RQ path,
introduced by Tariq.

First Four patches are trivial misc cleanups.
 - Spelling mistake fix
 - Dead code removal
 - Warning messages

RX optimizations for striding RQ:

1) RX refactoring, cleanups and micro optimizations
   - MTU calculation simplifications, obsoletes some WQEs-to-packets translation
     functions and helps delete ~60 LOC.
   - Do not busy-wait a pending UMR completion.
   - post the new values of UMR WQE inline, instead of using a data pointer.
   - use pre-initialized structures to save calculations in datapath.

2) Use linear SKB in Striding RQ "build_skb", (Using linear SKB has many advantages):
    - Saves a memcpy of the headers.
    - No page-boundary checks in datapath.
    - No filler CQEs.
    - Significantly smaller CQ.
    - SKB data continuously resides in linear part, and not split to
      small amount (linear part) and large amount (fragment).
      This saves datapath cycles in driver and improves utilization
      of SKB fragments in GRO.
    - The fragments of a resulting GRO SKB follow the IP forwarding
      assumption of equal-size fragments.

    implementation details:
    HW writes the packets to the beginning of a stride,
    i.e. does not keep headroom. To overcome this we make sure we can
    extend backwards and use the last bytes of stride i-1.
    Extra care is needed for stride 0 as it has no preceding stride.
    We make sure headroom bytes are available by shifting the buffer
    pointer passed to HW by headroom bytes.

    This configuration now becomes default, whenever capable.
    Of course, this implies turning LRO off.

    Performance testing:
    ConnectX-5, single core, single RX ring, default MTU.

    UDP packet rate, early drop in TC layer:

    --------------------------------------------
    | pkt size | before    | after     | ratio |
    --------------------------------------------
    | 1500byte | 4.65 Mpps | 5.96 Mpps | 1.28x |
    |  500byte | 5.23 Mpps | 5.97 Mpps | 1.14x |
    |   64byte | 5.94 Mpps | 5.96 Mpps | 1.00x |
    --------------------------------------------

    TCP streams: ~20% gain

3) Support XDP over Striding RQ:
    Now that linear SKB is supported over Striding RQ,
    we can support XDP by setting stride size to PAGE_SIZE
    and headroom to XDP_PACKET_HEADROOM.

    Striding RQ is capable of a higher packet-rate than
    conventional RQ.

    Performance testing:
    ConnectX-5, 24 rings, default MTU.
    CQE compression ON (to reduce completions BW in PCI).

    XDP_DROP packet rate:
    --------------------------------------------------
    | pkt size | XDP rate   | 100GbE linerate | pct% |
    --------------------------------------------------
    |   64byte | 126.2 Mpps |      148.0 Mpps |  85% |
    |  128byte |  80.0 Mpps |       84.8 Mpps |  94% |
    |  256byte |  42.7 Mpps |       42.7 Mpps | 100% |
    |  512byte |  23.4 Mpps |       23.4 Mpps | 100% |
    --------------------------------------------------

4) Remove mlx5 page_ref bulking in Striding RQ and use page_ref_inc only when needed.
   Without this bulking, we have:
    - no atomic ops on WQE allocation or free
    - one atomic op per SKB
    - In the default MTU configuration (1500, stride size is 2K),
      the non-bulking method execute 2 atomic ops as before
    - For larger MTUs with stride size of 4K, non-bulking method
      executes only a single op.
    - For XDP (stride size of 4K, no SKBs), non-bulking have no atomic ops per packet at all.

    Performance testing:
    ConnectX-5, Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz.

    Single core packet rate (64 bytes).

    Early drop in TC: no degradation.

    XDP_DROP:
    before: 14,270,188 pps
    after:  20,503,603 pps, 43% improvement.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e2e80c02 ab966d7e
...@@ -57,24 +57,12 @@ ...@@ -57,24 +57,12 @@
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
#define MLX5E_HW2SW_MTU(priv, hwmtu) ((hwmtu) - ((priv)->hard_mtu)) #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
#define MLX5E_SW2HW_MTU(priv, swmtu) ((swmtu) + ((priv)->hard_mtu)) #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
#define MLX5E_MAX_DSCP 64 #define MLX5E_MAX_DSCP 64
#define MLX5E_MAX_NUM_TC 8 #define MLX5E_MAX_NUM_TC 8
#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6
#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x1
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6
#define MLX5_RX_HEADROOM NET_SKB_PAD #define MLX5_RX_HEADROOM NET_SKB_PAD
#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ #define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
...@@ -95,11 +83,29 @@ ...@@ -95,11 +83,29 @@
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
#define MLX5E_REQUIRED_MTTS(wqes) \ #define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
(wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) #define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS))
#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
#define MLX5E_MAX_RQ_NUM_MTTS \
((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \
(ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \
(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \
(MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU))
#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6
#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x1
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \
MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW)
#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
#define MLX5_UMR_ALIGN (2048)
#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (256) #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (256)
#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
...@@ -124,8 +130,13 @@ ...@@ -124,8 +130,13 @@
#define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */
#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
#define MLX5E_ICOSQ_MAX_WQEBBS \ #define MLX5E_UMR_WQE_INLINE_SZ \
(DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) (sizeof(struct mlx5e_umr_wqe) + \
ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \
MLX5_UMR_MTT_ALIGNMENT))
#define MLX5E_UMR_WQEBBS \
(DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_TX_DS_COUNT \ #define MLX5E_XDP_TX_DS_COUNT \
...@@ -155,26 +166,6 @@ static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) ...@@ -155,26 +166,6 @@ static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
} }
} }
static inline int mlx5_min_log_rq_size(int wq_type)
{
switch (wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
default:
return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
}
}
static inline int mlx5_max_log_rq_size(int wq_type)
{
switch (wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW;
default:
return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
}
}
static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{ {
return is_kdump_kernel() ? return is_kdump_kernel() ?
...@@ -197,7 +188,7 @@ struct mlx5e_umr_wqe { ...@@ -197,7 +188,7 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc; struct mlx5_mkey_seg mkc;
struct mlx5_wqe_data_seg data; struct mlx5_mtt inline_mtts[0];
}; };
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
...@@ -233,7 +224,7 @@ enum mlx5e_priv_flag { ...@@ -233,7 +224,7 @@ enum mlx5e_priv_flag {
struct mlx5e_params { struct mlx5e_params {
u8 log_sq_size; u8 log_sq_size;
u8 rq_wq_type; u8 rq_wq_type;
u8 log_rq_size; u8 log_rq_mtu_frames;
u16 num_channels; u16 num_channels;
u8 num_tc; u8 num_tc;
bool rx_cqe_compress_def; bool rx_cqe_compress_def;
...@@ -251,6 +242,8 @@ struct mlx5e_params { ...@@ -251,6 +242,8 @@ struct mlx5e_params {
u32 lro_timeout; u32 lro_timeout;
u32 pflags; u32 pflags;
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
unsigned int sw_mtu;
int hard_mtu;
}; };
#ifdef CONFIG_MLX5_CORE_EN_DCB #ifdef CONFIG_MLX5_CORE_EN_DCB
...@@ -433,7 +426,6 @@ struct mlx5e_icosq { ...@@ -433,7 +426,6 @@ struct mlx5e_icosq {
void __iomem *uar_map; void __iomem *uar_map;
u32 sqn; u32 sqn;
u16 edge; u16 edge;
__be32 mkey_be;
unsigned long state; unsigned long state;
/* control path */ /* control path */
...@@ -458,16 +450,13 @@ struct mlx5e_wqe_frag_info { ...@@ -458,16 +450,13 @@ struct mlx5e_wqe_frag_info {
}; };
struct mlx5e_umr_dma_info { struct mlx5e_umr_dma_info {
__be64 *mtt;
dma_addr_t mtt_addr;
struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
struct mlx5e_umr_wqe wqe;
}; };
struct mlx5e_mpw_info { struct mlx5e_mpw_info {
struct mlx5e_umr_dma_info umr; struct mlx5e_umr_dma_info umr;
u16 consumed_strides; u16 consumed_strides;
u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
}; };
/* a single cache unit is capable to serve one napi call (for non-striding rq) /* a single cache unit is capable to serve one napi call (for non-striding rq)
...@@ -484,9 +473,16 @@ struct mlx5e_page_cache { ...@@ -484,9 +473,16 @@ struct mlx5e_page_cache {
struct mlx5e_rq; struct mlx5e_rq;
typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
typedef struct sk_buff *
(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx);
typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
enum mlx5e_rq_flag {
MLX5E_RQ_FLAG_XDP_XMIT = BIT(0),
};
struct mlx5e_rq { struct mlx5e_rq {
/* data path */ /* data path */
struct mlx5_wq_ll wq; struct mlx5_wq_ll wq;
...@@ -497,12 +493,12 @@ struct mlx5e_rq { ...@@ -497,12 +493,12 @@ struct mlx5e_rq {
u32 frag_sz; /* max possible skb frag_sz */ u32 frag_sz; /* max possible skb frag_sz */
union { union {
bool page_reuse; bool page_reuse;
bool xdp_xmit;
}; };
} wqe; } wqe;
struct { struct {
struct mlx5e_umr_wqe umr_wqe;
struct mlx5e_mpw_info *info; struct mlx5e_mpw_info *info;
void *mtt_no_align; mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
u16 num_strides; u16 num_strides;
u8 log_stride_sz; u8 log_stride_sz;
bool umr_in_progress; bool umr_in_progress;
...@@ -534,7 +530,9 @@ struct mlx5e_rq { ...@@ -534,7 +530,9 @@ struct mlx5e_rq {
/* XDP */ /* XDP */
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
unsigned int hw_mtu;
struct mlx5e_xdpsq xdpsq; struct mlx5e_xdpsq xdpsq;
DECLARE_BITMAP(flags, 8);
/* control */ /* control */
struct mlx5_wq_ctrl wq_ctrl; struct mlx5_wq_ctrl wq_ctrl;
...@@ -767,7 +765,6 @@ struct mlx5e_priv { ...@@ -767,7 +765,6 @@ struct mlx5e_priv {
struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
u32 tx_rates[MLX5E_MAX_NUM_SQS]; u32 tx_rates[MLX5E_MAX_NUM_SQS];
int hard_mtu;
struct mlx5e_flow_steering fs; struct mlx5e_flow_steering fs;
struct mlx5e_vxlan_db vxlan; struct mlx5e_vxlan_db vxlan;
...@@ -846,11 +843,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); ...@@ -846,11 +843,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
struct sk_buff *
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
struct mlx5e_params *params); u16 cqe_bcnt, u32 head_offset, u32 page_idx);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, struct sk_buff *
struct mlx5e_params *params); mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
u16 cqe_bcnt, u32 head_offset, u32 page_idx);
void mlx5e_update_stats(struct mlx5e_priv *priv); void mlx5e_update_stats(struct mlx5e_priv *priv);
...@@ -981,11 +979,6 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) ...@@ -981,11 +979,6 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
} }
static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
{
return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
}
extern const struct ethtool_ops mlx5e_ethtool_ops; extern const struct ethtool_ops mlx5e_ethtool_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB #ifdef CONFIG_MLX5_CORE_EN_DCB
extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
...@@ -1111,7 +1104,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv); ...@@ -1111,7 +1104,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params, struct mlx5e_params *params,
u16 max_channels); u16 max_channels, u16 mtu);
u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_rx_dim_work(struct work_struct *work);
#endif /* __MLX5_EN_H__ */ #endif /* __MLX5_EN_H__ */
...@@ -220,60 +220,12 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, ...@@ -220,60 +220,12 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
mlx5e_ethtool_get_ethtool_stats(priv, stats, data); mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
} }
static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
int num_wqe)
{
int packets_per_wqe;
int stride_size;
int num_strides;
int wqe_size;
if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
return num_wqe;
stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params);
num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params);
wqe_size = stride_size * num_strides;
packets_per_wqe = wqe_size /
ALIGN(ETH_DATA_LEN, stride_size);
return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1));
}
static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
int num_packets)
{
int packets_per_wqe;
int stride_size;
int num_strides;
int wqe_size;
int num_wqes;
if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
return num_packets;
stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params);
num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params);
wqe_size = stride_size * num_strides;
num_packets = (1 << order_base_2(num_packets));
packets_per_wqe = wqe_size /
ALIGN(ETH_DATA_LEN, stride_size);
num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe);
return 1 << (order_base_2(num_wqes));
}
void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
struct ethtool_ringparam *param) struct ethtool_ringparam *param)
{ {
int rq_wq_type = priv->channels.params.rq_wq_type; param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
1 << mlx5_max_log_rq_size(rq_wq_type));
param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames;
1 << priv->channels.params.log_rq_size);
param->tx_pending = 1 << priv->channels.params.log_sq_size; param->tx_pending = 1 << priv->channels.params.log_sq_size;
} }
...@@ -288,13 +240,9 @@ static void mlx5e_get_ringparam(struct net_device *dev, ...@@ -288,13 +240,9 @@ static void mlx5e_get_ringparam(struct net_device *dev,
int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
struct ethtool_ringparam *param) struct ethtool_ringparam *param)
{ {
int rq_wq_type = priv->channels.params.rq_wq_type;
struct mlx5e_channels new_channels = {}; struct mlx5e_channels new_channels = {};
u32 rx_pending_wqes;
u32 min_rq_size;
u8 log_rq_size; u8 log_rq_size;
u8 log_sq_size; u8 log_sq_size;
u32 num_mtts;
int err = 0; int err = 0;
if (param->rx_jumbo_pending) { if (param->rx_jumbo_pending) {
...@@ -308,23 +256,10 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, ...@@ -308,23 +256,10 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
return -EINVAL; return -EINVAL;
} }
min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
1 << mlx5_min_log_rq_size(rq_wq_type));
rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type,
param->rx_pending);
if (param->rx_pending < min_rq_size) {
netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n", netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n",
__func__, param->rx_pending, __func__, param->rx_pending,
min_rq_size); 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
return -EINVAL;
}
num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes);
if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
!MLX5E_VALID_NUM_MTTS(num_mtts)) {
netdev_info(priv->netdev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n",
__func__, param->rx_pending);
return -EINVAL; return -EINVAL;
} }
...@@ -335,17 +270,17 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, ...@@ -335,17 +270,17 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
return -EINVAL; return -EINVAL;
} }
log_rq_size = order_base_2(rx_pending_wqes); log_rq_size = order_base_2(param->rx_pending);
log_sq_size = order_base_2(param->tx_pending); log_sq_size = order_base_2(param->tx_pending);
if (log_rq_size == priv->channels.params.log_rq_size && if (log_rq_size == priv->channels.params.log_rq_mtu_frames &&
log_sq_size == priv->channels.params.log_sq_size) log_sq_size == priv->channels.params.log_sq_size)
return 0; return 0;
mutex_lock(&priv->state_lock); mutex_lock(&priv->state_lock);
new_channels.params = priv->channels.params; new_channels.params = priv->channels.params;
new_channels.params.log_rq_size = log_rq_size; new_channels.params.log_rq_mtu_frames = log_rq_size;
new_channels.params.log_sq_size = log_sq_size; new_channels.params.log_sq_size = log_sq_size;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
......
...@@ -877,9 +877,10 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, ...@@ -877,9 +877,10 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE; MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
params->hard_mtu = MLX5E_ETH_HARD_MTU;
params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; params->log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
mlx5e_set_rx_cq_mode_params(params, cq_period_mode); mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
...@@ -926,8 +927,6 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, ...@@ -926,8 +927,6 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
priv->channels.params.num_channels = profile->max_nch(mdev); priv->channels.params.num_channels = profile->max_nch(mdev);
priv->hard_mtu = MLX5E_ETH_HARD_MTU;
mlx5e_build_rep_params(mdev, &priv->channels.params); mlx5e_build_rep_params(mdev, &priv->channels.params);
mlx5e_build_rep_netdev(netdev); mlx5e_build_rep_netdev(netdev);
......
...@@ -245,7 +245,7 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) ...@@ -245,7 +245,7 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
force_state = MLX5_GET(teardown_hca_out, out, force_state); force_state = MLX5_GET(teardown_hca_out, out, force_state);
if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
mlx5_core_err(dev, "teardown with force mode failed\n"); mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n");
return -EIO; return -EIO;
} }
......
...@@ -61,11 +61,12 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, ...@@ -61,11 +61,12 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
mlx5e_init_rq_type_params(mdev, params); mlx5e_init_rq_type_params(mdev, params);
/* RQ size in ipoib by default is 512 */ /* RQ size in ipoib by default is 512 */
params->log_rq_size = is_kdump_kernel() ? params->log_rq_mtu_frames = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
params->lro_en = false; params->lro_en = false;
params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
} }
/* Called directly after IPoIB netdevice was created to initialize SW structs */ /* Called directly after IPoIB netdevice was created to initialize SW structs */
...@@ -81,10 +82,10 @@ void mlx5i_init(struct mlx5_core_dev *mdev, ...@@ -81,10 +82,10 @@ void mlx5i_init(struct mlx5_core_dev *mdev,
priv->netdev = netdev; priv->netdev = netdev;
priv->profile = profile; priv->profile = profile;
priv->ppriv = ppriv; priv->ppriv = ppriv;
priv->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
mutex_init(&priv->state_lock); mutex_init(&priv->state_lock);
mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); mlx5e_build_nic_params(mdev, &priv->channels.params,
profile->max_nch(mdev), netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params); mlx5i_build_nic_params(mdev, &priv->channels.params);
mlx5e_timestamp_init(priv); mlx5e_timestamp_init(priv);
...@@ -368,25 +369,27 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) ...@@ -368,25 +369,27 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
{ {
struct mlx5e_priv *priv = mlx5i_epriv(netdev); struct mlx5e_priv *priv = mlx5i_epriv(netdev);
struct mlx5e_channels new_channels = {}; struct mlx5e_channels new_channels = {};
int curr_mtu; struct mlx5e_params *params;
int err = 0; int err = 0;
mutex_lock(&priv->state_lock); mutex_lock(&priv->state_lock);
curr_mtu = netdev->mtu; params = &priv->channels.params;
netdev->mtu = new_mtu;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
params->sw_mtu = new_mtu;
netdev->mtu = params->sw_mtu;
goto out; goto out;
}
new_channels.params = priv->channels.params; new_channels.params = *params;
new_channels.params.sw_mtu = new_mtu;
err = mlx5e_open_channels(priv, &new_channels); err = mlx5e_open_channels(priv, &new_channels);
if (err) { if (err)
netdev->mtu = curr_mtu;
goto out; goto out;
}
mlx5e_switch_priv_channels(priv, &new_channels, NULL); mlx5e_switch_priv_channels(priv, &new_channels, NULL);
netdev->mtu = new_channels.params.sw_mtu;
out: out:
mutex_unlock(&priv->state_lock); mutex_unlock(&priv->state_lock);
...@@ -540,7 +543,7 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, ...@@ -540,7 +543,7 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn); err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn);
if (err) if (err)
mlx5_core_dbg(mdev, "failed dettaching QPN 0x%x, MGID %pI6\n", mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n",
ipriv->qp.qpn, gid->raw); ipriv->qp.qpn, gid->raw);
return err; return err;
......
...@@ -290,7 +290,7 @@ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, ...@@ -290,7 +290,7 @@ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops; netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops;
/* Use dummy rqs */ /* Use dummy rqs */
priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
} }
/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
......
...@@ -354,27 +354,6 @@ int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) ...@@ -354,27 +354,6 @@ int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
} }
int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out)
{
u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {0};
void *srqc;
void *xrc_srqc;
int err;
MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ);
MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn);
err = mlx5_cmd_exec(dev, in, sizeof(in), out,
MLX5_ST_SZ_BYTES(query_xrc_srq_out));
if (!err) {
xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out,
xrc_srq_context_entry);
srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry);
memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc));
}
return err;
}
int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
{ {
u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
......
...@@ -782,6 +782,9 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) ...@@ -782,6 +782,9 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe)
return (u64)lo | ((u64)hi << 32); return (u64)lo | ((u64)hi << 32);
} }
#define MLX5_MPWQE_LOG_NUM_STRIDES_BASE (9)
#define MLX5_MPWQE_LOG_STRIDE_SZ_BASE (6)
struct mpwrq_cqe_bc { struct mpwrq_cqe_bc {
__be16 filler_consumed_strides; __be16 filler_consumed_strides;
__be16 byte_cnt; __be16 byte_cnt;
......
...@@ -1038,7 +1038,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { ...@@ -1038,7 +1038,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_398[0x3]; u8 reserved_at_398[0x3];
u8 log_max_tis_per_sq[0x5]; u8 log_max_tis_per_sq[0x5];
u8 reserved_at_3a0[0x3]; u8 ext_stride_num_range[0x1];
u8 reserved_at_3a1[0x2];
u8 log_max_stride_sz_rq[0x5]; u8 log_max_stride_sz_rq[0x5];
u8 reserved_at_3a8[0x3]; u8 reserved_at_3a8[0x3];
u8 log_min_stride_sz_rq[0x5]; u8 log_min_stride_sz_rq[0x5];
...@@ -1205,9 +1206,9 @@ struct mlx5_ifc_wq_bits { ...@@ -1205,9 +1206,9 @@ struct mlx5_ifc_wq_bits {
u8 log_hairpin_num_packets[0x5]; u8 log_hairpin_num_packets[0x5];
u8 reserved_at_128[0x3]; u8 reserved_at_128[0x3];
u8 log_hairpin_data_sz[0x5]; u8 log_hairpin_data_sz[0x5];
u8 reserved_at_130[0x5];
u8 log_wqe_num_of_strides[0x3]; u8 reserved_at_130[0x4];
u8 log_wqe_num_of_strides[0x4];
u8 two_byte_shift_en[0x1]; u8 two_byte_shift_en[0x1];
u8 reserved_at_139[0x4]; u8 reserved_at_139[0x4];
u8 log_wqe_stride_size[0x3]; u8 log_wqe_stride_size[0x3];
......
...@@ -67,7 +67,6 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); ...@@ -67,7 +67,6 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rmpn); u32 *rmpn);
int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment