Commit 61147f39 authored by Bodong Wang's avatar Bodong Wang Committed by Jason Gunthorpe

IB/mlx5: Packet packing enhancement for RAW QP

Enable RAW QP to be able to configure burst control by modify_qp. By
using burst control with rate limiting, user can achieve best
performance and accuracy. The burst control information is passed by
user through udata.

This patch also reports burst control capability for mlx5 related
hardwares, burst control is only marked as supported when both
packet_pacing_burst_bound and packet_pacing_typical_size are
supported.
Signed-off-by: default avatarBodong Wang <bodong@mellanox.com>
Reviewed-by: default avatarDaniel Jurgens <danielj@mellanox.com>
Reviewed-by: default avatarYishai Hadas <yishaih@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 05d3ac97
...@@ -989,6 +989,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, ...@@ -989,6 +989,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_CAP_QOS(mdev, packet_pacing_min_rate); MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
resp.packet_pacing_caps.supported_qpts |= resp.packet_pacing_caps.supported_qpts |=
1 << IB_QPT_RAW_PACKET; 1 << IB_QPT_RAW_PACKET;
if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) &&
MLX5_CAP_QOS(mdev, packet_pacing_typical_size))
resp.packet_pacing_caps.cap_flags |=
MLX5_IB_PP_SUPPORT_BURST;
} }
resp.response_length += sizeof(resp.packet_pacing_caps); resp.response_length += sizeof(resp.packet_pacing_caps);
} }
......
...@@ -403,7 +403,7 @@ struct mlx5_ib_qp { ...@@ -403,7 +403,7 @@ struct mlx5_ib_qp {
struct list_head qps_list; struct list_head qps_list;
struct list_head cq_recv_list; struct list_head cq_recv_list;
struct list_head cq_send_list; struct list_head cq_send_list;
u32 rate_limit; struct mlx5_rate_limit rl;
u32 underlay_qpn; u32 underlay_qpn;
bool tunnel_offload_en; bool tunnel_offload_en;
/* storage for qp sub type when core qp type is IB_QPT_DRIVER */ /* storage for qp sub type when core qp type is IB_QPT_DRIVER */
......
...@@ -86,7 +86,9 @@ struct mlx5_modify_raw_qp_param { ...@@ -86,7 +86,9 @@ struct mlx5_modify_raw_qp_param {
u16 operation; u16 operation;
u32 set_mask; /* raw_qp_set_mask_map */ u32 set_mask; /* raw_qp_set_mask_map */
u32 rate_limit;
struct mlx5_rate_limit rl;
u8 rq_q_ctr_id; u8 rq_q_ctr_id;
}; };
...@@ -2774,8 +2776,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, ...@@ -2774,8 +2776,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
const struct mlx5_modify_raw_qp_param *raw_qp_param) const struct mlx5_modify_raw_qp_param *raw_qp_param)
{ {
struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
u32 old_rate = ibqp->rate_limit; struct mlx5_rate_limit old_rl = ibqp->rl;
u32 new_rate = old_rate; struct mlx5_rate_limit new_rl = old_rl;
bool new_rate_added = false;
u16 rl_index = 0; u16 rl_index = 0;
void *in; void *in;
void *sqc; void *sqc;
...@@ -2797,39 +2800,43 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, ...@@ -2797,39 +2800,43 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n", pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
__func__); __func__);
else else
new_rate = raw_qp_param->rate_limit; new_rl = raw_qp_param->rl;
} }
if (old_rate != new_rate) { if (!mlx5_rl_are_equal(&old_rl, &new_rl)) {
if (new_rate) { if (new_rl.rate) {
err = mlx5_rl_add_rate(dev, new_rate, &rl_index); err = mlx5_rl_add_rate(dev, &rl_index, &new_rl);
if (err) { if (err) {
pr_err("Failed configuring rate %u: %d\n", pr_err("Failed configuring rate limit(err %d): \
new_rate, err); rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
err, new_rl.rate, new_rl.max_burst_sz,
new_rl.typical_pkt_sz);
goto out; goto out;
} }
new_rate_added = true;
} }
MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
/* index 0 means no limit */
MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
} }
err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
if (err) { if (err) {
/* Remove new rate from table if failed */ /* Remove new rate from table if failed */
if (new_rate && if (new_rate_added)
old_rate != new_rate) mlx5_rl_remove_rate(dev, &new_rl);
mlx5_rl_remove_rate(dev, new_rate);
goto out; goto out;
} }
/* Only remove the old rate after new rate was set */ /* Only remove the old rate after new rate was set */
if ((old_rate && if ((old_rl.rate &&
(old_rate != new_rate)) || !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
(new_state != MLX5_SQC_STATE_RDY)) (new_state != MLX5_SQC_STATE_RDY))
mlx5_rl_remove_rate(dev, old_rate); mlx5_rl_remove_rate(dev, &old_rl);
ibqp->rate_limit = new_rate; ibqp->rl = new_rl;
sq->state = new_state; sq->state = new_state;
out: out:
...@@ -2906,7 +2913,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, ...@@ -2906,7 +2913,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask, const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state) enum ib_qp_state cur_state, enum ib_qp_state new_state,
const struct mlx5_ib_modify_qp *ucmd)
{ {
static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
[MLX5_QP_STATE_RST] = { [MLX5_QP_STATE_RST] = {
...@@ -3144,7 +3152,30 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, ...@@ -3144,7 +3152,30 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
} }
if (attr_mask & IB_QP_RATE_LIMIT) { if (attr_mask & IB_QP_RATE_LIMIT) {
raw_qp_param.rate_limit = attr->rate_limit; raw_qp_param.rl.rate = attr->rate_limit;
if (ucmd->burst_info.max_burst_sz) {
if (attr->rate_limit &&
MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) {
raw_qp_param.rl.max_burst_sz =
ucmd->burst_info.max_burst_sz;
} else {
err = -EINVAL;
goto out;
}
}
if (ucmd->burst_info.typical_pkt_sz) {
if (attr->rate_limit &&
MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) {
raw_qp_param.rl.typical_pkt_sz =
ucmd->burst_info.typical_pkt_sz;
} else {
err = -EINVAL;
goto out;
}
}
raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
} }
...@@ -3332,8 +3363,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -3332,8 +3363,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{ {
struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_modify_qp ucmd = {};
enum ib_qp_type qp_type; enum ib_qp_type qp_type;
enum ib_qp_state cur_state, new_state; enum ib_qp_state cur_state, new_state;
size_t required_cmd_sz;
int err = -EINVAL; int err = -EINVAL;
int port; int port;
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
...@@ -3341,6 +3374,28 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -3341,6 +3374,28 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (ibqp->rwq_ind_tbl) if (ibqp->rwq_ind_tbl)
return -ENOSYS; return -ENOSYS;
if (udata && udata->inlen) {
required_cmd_sz = offsetof(typeof(ucmd), reserved) +
sizeof(ucmd.reserved);
if (udata->inlen < required_cmd_sz)
return -EINVAL;
if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
udata->inlen - sizeof(ucmd)))
return -EOPNOTSUPP;
if (ib_copy_from_udata(&ucmd, udata,
min(udata->inlen, sizeof(ucmd))))
return -EFAULT;
if (ucmd.comp_mask ||
memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) ||
memchr_inv(&ucmd.burst_info.reserved, 0,
sizeof(ucmd.burst_info.reserved)))
return -EOPNOTSUPP;
}
if (unlikely(ibqp->qp_type == IB_QPT_GSI)) if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
...@@ -3421,7 +3476,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ...@@ -3421,7 +3476,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out; goto out;
} }
err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state,
new_state, &ucmd);
out: out:
mutex_unlock(&qp->mutex); mutex_unlock(&qp->mutex);
......
...@@ -163,6 +163,10 @@ struct mlx5_ib_cqe_comp_caps { ...@@ -163,6 +163,10 @@ struct mlx5_ib_cqe_comp_caps {
__u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */ __u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */
}; };
enum mlx5_ib_packet_pacing_cap_flags {
MLX5_IB_PP_SUPPORT_BURST = 1 << 0,
};
struct mlx5_packet_pacing_caps { struct mlx5_packet_pacing_caps {
__u32 qp_rate_limit_min; __u32 qp_rate_limit_min;
__u32 qp_rate_limit_max; /* In kpbs */ __u32 qp_rate_limit_max; /* In kpbs */
...@@ -172,7 +176,8 @@ struct mlx5_packet_pacing_caps { ...@@ -172,7 +176,8 @@ struct mlx5_packet_pacing_caps {
* supported_qpts |= 1 << IB_QPT_RAW_PACKET * supported_qpts |= 1 << IB_QPT_RAW_PACKET
*/ */
__u32 supported_qpts; __u32 supported_qpts;
__u32 reserved; __u8 cap_flags; /* enum mlx5_ib_packet_pacing_cap_flags */
__u8 reserved[3];
}; };
enum mlx5_ib_mpw_caps { enum mlx5_ib_mpw_caps {
...@@ -362,6 +367,18 @@ struct mlx5_ib_create_ah_resp { ...@@ -362,6 +367,18 @@ struct mlx5_ib_create_ah_resp {
__u8 reserved[6]; __u8 reserved[6];
}; };
struct mlx5_ib_burst_info {
__u32 max_burst_sz;
__u16 typical_pkt_sz;
__u16 reserved;
};
struct mlx5_ib_modify_qp {
__u32 comp_mask;
struct mlx5_ib_burst_info burst_info;
__u32 reserved;
};
struct mlx5_ib_modify_qp_resp { struct mlx5_ib_modify_qp_resp {
__u32 response_length; __u32 response_length;
__u32 dctn; __u32 dctn;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment