Commit 4e62ccd9 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx4-next'

Amir Vadai says:

====================
net/mlx4_en: Optimizations to TX flow

This patchset contains optimizations to TX flow in mlx4_en driver. It also introduce
setting/getting tx copybreak, to enable controlling inline threshold dynamically.

TX flow optimizations was authored and posted to the mailing list by Eric
Dumazet [1] as a single patch. I splitted this patch to smaller patches,
Reviewed it and tested.
Changed from original patch:
- s/iowrite32be/iowrite32/, since ring->doorbell_qpn is stored as be32

The tx copybreak patch was also suggested by Eric Dumazet, and was edited and
reviewed by me. User space patch will be sent after kernel code is ready.

I am sending this patchset now since the merge window is near and don't want to
miss it.

More work need to do:
- Disable BF when xmit_more is in use
- Make TSO use xmit_more too. Maybe by splitting small TSO packets in the
  driver itself, to avoid extra cpu/memory costs of GSO before the driver
- Fix mlx4_en_xmit buggy handling of queue full in the middle of a burst
  partially posted to send queue using xmit_more

Eric, I edited the patches to have you as the Author and the first
signed-off-by. I hope it is ok with you (I wasn't sure if it is ok to sign by
you), anyway all the credit to those changes should go to you.

Patchset was tested and applied over commit 1e203c1a "(net: sched:
suspicious RCU usage in qdisc_watchdog")

[1] - https://patchwork.ozlabs.org/patch/394256/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f2600cf0 1556b874
......@@ -1267,6 +1267,48 @@ static u32 mlx4_en_get_priv_flags(struct net_device *dev)
return priv->pflags;
}
static int mlx4_en_get_tunable(struct net_device *dev,
const struct ethtool_tunable *tuna,
void *data)
{
const struct mlx4_en_priv *priv = netdev_priv(dev);
int ret = 0;
switch (tuna->id) {
case ETHTOOL_TX_COPYBREAK:
*(u32 *)data = priv->prof->inline_thold;
break;
default:
ret = -EINVAL;
break;
}
return ret;
}
static int mlx4_en_set_tunable(struct net_device *dev,
const struct ethtool_tunable *tuna,
const void *data)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
int val, ret = 0;
switch (tuna->id) {
case ETHTOOL_TX_COPYBREAK:
val = *(u32 *)data;
if (val < MIN_PKT_LEN || val > MAX_INLINE)
ret = -EINVAL;
else
priv->prof->inline_thold = val;
break;
default:
ret = -EINVAL;
break;
}
return ret;
}
const struct ethtool_ops mlx4_en_ethtool_ops = {
.get_drvinfo = mlx4_en_get_drvinfo,
......@@ -1297,6 +1339,8 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
.get_ts_info = mlx4_en_get_ts_info,
.set_priv_flags = mlx4_en_set_priv_flags,
.get_priv_flags = mlx4_en_get_priv_flags,
.get_tunable = mlx4_en_get_tunable,
.set_tunable = mlx4_en_set_tunable,
};
......
This diff is collapsed.
......@@ -216,13 +216,16 @@ enum cq_type {
struct mlx4_en_tx_info {
struct sk_buff *skb;
u32 nr_txbb;
u32 nr_bytes;
u8 linear;
u8 data_offset;
u8 inl;
u8 ts_requested;
};
dma_addr_t map0_dma;
u32 map0_byte_count;
u32 nr_txbb;
u32 nr_bytes;
u8 linear;
u8 data_offset;
u8 inl;
u8 ts_requested;
u8 nr_maps;
} ____cacheline_aligned_in_smp;
#define MLX4_EN_BIT_DESC_OWN 0x80000000
......@@ -253,41 +256,46 @@ struct mlx4_en_rx_alloc {
};
struct mlx4_en_tx_ring {
/* cache line used and dirtied in tx completion
* (mlx4_en_free_tx_buf())
*/
u32 last_nr_txbb;
u32 cons;
unsigned long wake_queue;
/* cache line used and dirtied in mlx4_en_xmit() */
u32 prod ____cacheline_aligned_in_smp;
unsigned long bytes;
unsigned long packets;
unsigned long tx_csum;
unsigned long tso_packets;
unsigned long xmit_more;
struct mlx4_bf bf;
unsigned long queue_stopped;
/* Following part should be mostly read */
cpumask_t affinity_mask;
struct mlx4_qp qp;
struct mlx4_hwq_resources wqres;
u32 size ; /* number of TXBBs */
u32 size_mask;
u16 stride;
u16 cqn; /* index of port CQ associated with this ring */
u32 prod;
u32 cons;
u32 buf_size;
u32 doorbell_qpn;
void *buf;
u16 poll_cnt;
struct mlx4_en_tx_info *tx_info;
u8 *bounce_buf;
u8 queue_index;
cpumask_t affinity_mask;
u32 last_nr_txbb;
struct mlx4_qp qp;
struct mlx4_qp_context context;
int qpn;
enum mlx4_qp_state qp_state;
struct mlx4_srq dummy;
unsigned long bytes;
unsigned long packets;
unsigned long tx_csum;
unsigned long queue_stopped;
unsigned long wake_queue;
unsigned long tso_packets;
unsigned long xmit_more;
struct mlx4_bf bf;
bool bf_enabled;
bool bf_alloced;
struct netdev_queue *tx_queue;
int hwtstamp_tx_type;
int inline_thold;
};
u32 size; /* number of TXBBs */
u32 size_mask;
u16 stride;
u16 cqn; /* index of port CQ associated with this ring */
u32 buf_size;
__be32 doorbell_qpn;
__be32 mr_key;
void *buf;
struct mlx4_en_tx_info *tx_info;
u8 *bounce_buf;
struct mlx4_qp_context context;
int qpn;
enum mlx4_qp_state qp_state;
u8 queue_index;
bool bf_enabled;
bool bf_alloced;
struct netdev_queue *tx_queue;
int hwtstamp_tx_type;
} ____cacheline_aligned_in_smp;
struct mlx4_en_rx_desc {
/* actual number of entries depends on rx ring stride */
......
......@@ -583,7 +583,7 @@ struct mlx4_uar {
};
struct mlx4_bf {
unsigned long offset;
unsigned int offset;
int buf_size;
struct mlx4_uar *uar;
void __iomem *reg;
......
......@@ -212,6 +212,7 @@ struct ethtool_value {
enum tunable_id {
ETHTOOL_ID_UNSPEC,
ETHTOOL_RX_COPYBREAK,
ETHTOOL_TX_COPYBREAK,
};
enum tunable_type_id {
......
......@@ -1625,6 +1625,7 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
{
switch (tuna->id) {
case ETHTOOL_RX_COPYBREAK:
case ETHTOOL_TX_COPYBREAK:
if (tuna->len != sizeof(u32) ||
tuna->type_id != ETHTOOL_TUNABLE_U32)
return -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment