Commit 1c726c44 authored by Jason Gunthorpe's avatar Jason Gunthorpe

Merge HFI1 updates into k.o/for-next

Based on rdma.git for-rc for dependencies.

From Dennis Dalessandro:

====================

Here are some code improvement patches and fixes for less serious bugs to
TID RDMA than we sent for RC.

====================

* HFI1 updates:
  IB/hfi1: Implement CCA for TID RDMA protocol
  IB/hfi1: Remove WARN_ON when freeing expected receive groups
  IB/hfi1: Unify the software PSN check for TID RDMA READ/WRITE
  IB/hfi1: Add a function to read next expected psn from hardware flow
  IB/hfi1: Delay the release of destination mr for TID RDMA WRITE DATA
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parents 061ccb52 747b931f
...@@ -13232,7 +13232,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) ...@@ -13232,7 +13232,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
int total_contexts; int total_contexts;
int ret; int ret;
unsigned ngroups; unsigned ngroups;
int qos_rmt_count; int rmt_count;
int user_rmt_reduced; int user_rmt_reduced;
u32 n_usr_ctxts; u32 n_usr_ctxts;
u32 send_contexts = chip_send_contexts(dd); u32 send_contexts = chip_send_contexts(dd);
...@@ -13294,10 +13294,23 @@ static int set_up_context_variables(struct hfi1_devdata *dd) ...@@ -13294,10 +13294,23 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
n_usr_ctxts = rcv_contexts - total_contexts; n_usr_ctxts = rcv_contexts - total_contexts;
} }
/* each user context requires an entry in the RMT */ /*
qos_rmt_count = qos_rmt_entries(dd, NULL, NULL); * The RMT entries are currently allocated as shown below:
if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { * 1. QOS (0 to 128 entries);
user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count; * 2. FECN (num_kernel_context - 1 + num_user_contexts +
* num_vnic_contexts);
* 3. VNIC (num_vnic_contexts).
* It should be noted that FECN oversubscribe num_vnic_contexts
* entries of RMT because both VNIC and PSM could allocate any receive
* context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
* and PSM FECN must reserve an RMT entry for each possible PSM receive
* context.
*/
rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
if (HFI1_CAP_IS_KSET(TID_RDMA))
rmt_count += num_kernel_contexts - 1;
if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
dd_dev_err(dd, dd_dev_err(dd,
"RMT size is reducing the number of user receive contexts from %u to %d\n", "RMT size is reducing the number of user receive contexts from %u to %d\n",
n_usr_ctxts, n_usr_ctxts,
...@@ -14278,35 +14291,43 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt) ...@@ -14278,35 +14291,43 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1); init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
} }
static void init_user_fecn_handling(struct hfi1_devdata *dd, static void init_fecn_handling(struct hfi1_devdata *dd,
struct rsm_map_table *rmt) struct rsm_map_table *rmt)
{ {
struct rsm_rule_data rrd; struct rsm_rule_data rrd;
u64 reg; u64 reg;
int i, idx, regoff, regidx; int i, idx, regoff, regidx, start;
u8 offset; u8 offset;
u32 total_cnt;
if (HFI1_CAP_IS_KSET(TID_RDMA))
/* Exclude context 0 */
start = 1;
else
start = dd->first_dyn_alloc_ctxt;
total_cnt = dd->num_rcv_contexts - start;
/* there needs to be enough room in the map table */ /* there needs to be enough room in the map table */
if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) { if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n");
return; return;
} }
/* /*
* RSM will extract the destination context as an index into the * RSM will extract the destination context as an index into the
* map table. The destination contexts are a sequential block * map table. The destination contexts are a sequential block
* in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). * in the range start...num_rcv_contexts-1 (inclusive).
* Map entries are accessed as offset + extracted value. Adjust * Map entries are accessed as offset + extracted value. Adjust
* the added offset so this sequence can be placed anywhere in * the added offset so this sequence can be placed anywhere in
* the table - as long as the entries themselves do not wrap. * the table - as long as the entries themselves do not wrap.
* There are only enough bits in offset for the table size, so * There are only enough bits in offset for the table size, so
* start with that to allow for a "negative" offset. * start with that to allow for a "negative" offset.
*/ */
offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start);
(int)dd->first_dyn_alloc_ctxt);
for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; for (i = start, idx = rmt->used; i < dd->num_rcv_contexts;
i < dd->num_rcv_contexts; i++, idx++) { i++, idx++) {
/* replace with identity mapping */ /* replace with identity mapping */
regoff = (idx % 8) * 8; regoff = (idx % 8) * 8;
regidx = idx / 8; regidx = idx / 8;
...@@ -14341,7 +14362,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, ...@@ -14341,7 +14362,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
/* add rule 1 */ /* add rule 1 */
add_rsm_rule(dd, RSM_INS_FECN, &rrd); add_rsm_rule(dd, RSM_INS_FECN, &rrd);
rmt->used += dd->num_user_contexts; rmt->used += total_cnt;
} }
/* Initialize RSM for VNIC */ /* Initialize RSM for VNIC */
...@@ -14428,7 +14449,7 @@ static void init_rxe(struct hfi1_devdata *dd) ...@@ -14428,7 +14449,7 @@ static void init_rxe(struct hfi1_devdata *dd)
rmt = alloc_rsm_map_table(dd); rmt = alloc_rsm_map_table(dd);
/* set up QOS, including the QPN map table */ /* set up QOS, including the QPN map table */
init_qos(dd, rmt); init_qos(dd, rmt);
init_user_fecn_handling(dd, rmt); init_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt); complete_rsm_map_table(dd, rmt);
/* record number of used rsm map entries for vnic */ /* record number of used rsm map entries for vnic */
dd->vnic.rmt_start = rmt->used; dd->vnic.rmt_start = rmt->used;
......
...@@ -514,7 +514,9 @@ bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, ...@@ -514,7 +514,9 @@ bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
*/ */
do_cnp = prescan || do_cnp = prescan ||
(opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST &&
opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE) ||
opcode == TID_OP(READ_RESP) ||
opcode == TID_OP(ACK);
/* Call appropriate CNP handler */ /* Call appropriate CNP handler */
if (!ignore_fecn && do_cnp && fecn) if (!ignore_fecn && do_cnp && fecn)
......
...@@ -112,9 +112,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) ...@@ -112,9 +112,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
*/ */
void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
{ {
WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list));
WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list));
kfree(rcd->groups); kfree(rcd->groups);
rcd->groups = NULL; rcd->groups = NULL;
hfi1_exp_tid_group_init(rcd); hfi1_exp_tid_group_init(rcd);
......
...@@ -900,7 +900,9 @@ void notify_error_qp(struct rvt_qp *qp) ...@@ -900,7 +900,9 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) && if (!list_empty(&priv->s_iowait.list) &&
!(qp->s_flags & RVT_S_BUSY) && !(qp->s_flags & RVT_S_BUSY) &&
!(priv->s_flags & RVT_S_BUSY)) { !(priv->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO; qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
list_del_init(&priv->s_iowait.list); list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL; priv->s_iowait.lock = NULL;
rvt_put_qp(qp); rvt_put_qp(qp);
......
...@@ -140,10 +140,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, ...@@ -140,10 +140,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
case OP(RDMA_READ_RESPONSE_LAST): case OP(RDMA_READ_RESPONSE_LAST):
case OP(RDMA_READ_RESPONSE_ONLY): case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue]; e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->rdma_sge.mr) { release_rdma_sge_mr(e);
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
/* FALLTHROUGH */ /* FALLTHROUGH */
case OP(ATOMIC_ACKNOWLEDGE): case OP(ATOMIC_ACKNOWLEDGE):
/* /*
...@@ -343,7 +340,8 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, ...@@ -343,7 +340,8 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
break; break;
e->sent = 1; e->sent = 1;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); /* Do not free e->rdma_sge until all data are received */
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
break; break;
case TID_OP(READ_RESP): case TID_OP(READ_RESP):
...@@ -2643,10 +2641,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, ...@@ -2643,10 +2641,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
len = be32_to_cpu(reth->length); len = be32_to_cpu(reth->length);
if (unlikely(offset + len != e->rdma_sge.sge_length)) if (unlikely(offset + len != e->rdma_sge.sge_length))
goto unlock_done; goto unlock_done;
if (e->rdma_sge.mr) { release_rdma_sge_mr(e);
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
if (len != 0) { if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey); u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = get_ib_reth_vaddr(reth); u64 vaddr = get_ib_reth_vaddr(reth);
...@@ -3088,10 +3083,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ...@@ -3088,10 +3083,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
update_ack_queue(qp, next); update_ack_queue(qp, next);
} }
e = &qp->s_ack_queue[qp->r_head_ack_queue]; e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { release_rdma_sge_mr(e);
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
reth = &ohdr->u.rc.reth; reth = &ohdr->u.rc.reth;
len = be32_to_cpu(reth->length); len = be32_to_cpu(reth->length);
if (len) { if (len) {
...@@ -3166,10 +3158,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ...@@ -3166,10 +3158,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
update_ack_queue(qp, next); update_ack_queue(qp, next);
} }
e = &qp->s_ack_queue[qp->r_head_ack_queue]; e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { release_rdma_sge_mr(e);
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
/* Process OPFN special virtual address */ /* Process OPFN special virtual address */
if (opfn) { if (opfn) {
opfn_conn_response(qp, e, ateth); opfn_conn_response(qp, e, ateth);
......
...@@ -41,6 +41,14 @@ static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, ...@@ -41,6 +41,14 @@ static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
return rvt_restart_sge(ss, wqe, len); return rvt_restart_sge(ss, wqe, len);
} }
static inline void release_rdma_sge_mr(struct rvt_ack_entry *e)
{
if (e->rdma_sge.mr) {
rvt_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
}
struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev, struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
u8 *prev_ack, bool *scheduled); u8 *prev_ack, bool *scheduled);
int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val, int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
......
This diff is collapsed.
...@@ -76,10 +76,8 @@ struct tid_rdma_qp_params { ...@@ -76,10 +76,8 @@ struct tid_rdma_qp_params {
struct tid_flow_state { struct tid_flow_state {
u32 generation; u32 generation;
u32 psn; u32 psn;
u32 r_next_psn; /* next PSN to be received (in TID space) */
u8 index; u8 index;
u8 last_index; u8 last_index;
u8 flags;
}; };
enum tid_rdma_req_state { enum tid_rdma_req_state {
......
...@@ -53,7 +53,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); ...@@ -53,7 +53,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
"tid_r_comp %u pending_tid_r_segs %u " \ "tid_r_comp %u pending_tid_r_segs %u " \
"s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \ "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \
"s_state 0x%x hw_flow_index %u generation 0x%x " \ "s_state 0x%x hw_flow_index %u generation 0x%x " \
"fpsn 0x%x flow_flags 0x%x" "fpsn 0x%x"
#define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \ #define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \
"cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \ "cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \
...@@ -71,7 +71,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); ...@@ -71,7 +71,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
"pending_tid_w_segs %u sync_pt %s " \ "pending_tid_w_segs %u sync_pt %s " \
"ps_nak_psn 0x%x ps_nak_state 0x%x " \ "ps_nak_psn 0x%x ps_nak_state 0x%x " \
"prnr_nak_state 0x%x hw_flow_index %u generation "\ "prnr_nak_state 0x%x hw_flow_index %u generation "\
"0x%x fpsn 0x%x flow_flags 0x%x resync %s" \ "0x%x fpsn 0x%x resync %s" \
"r_next_psn_kdeth 0x%x" "r_next_psn_kdeth 0x%x"
#define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \ #define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \
...@@ -973,7 +973,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ ...@@ -973,7 +973,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__field(u32, hw_flow_index) __field(u32, hw_flow_index)
__field(u32, generation) __field(u32, generation)
__field(u32, fpsn) __field(u32, fpsn)
__field(u32, flow_flags)
), ),
TP_fast_assign(/* assign */ TP_fast_assign(/* assign */
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
...@@ -991,7 +990,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ ...@@ -991,7 +990,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__entry->hw_flow_index = priv->flow_state.index; __entry->hw_flow_index = priv->flow_state.index;
__entry->generation = priv->flow_state.generation; __entry->generation = priv->flow_state.generation;
__entry->fpsn = priv->flow_state.psn; __entry->fpsn = priv->flow_state.psn;
__entry->flow_flags = priv->flow_state.flags;
), ),
TP_printk(/* print */ TP_printk(/* print */
TID_READ_SENDER_PRN, TID_READ_SENDER_PRN,
...@@ -1007,8 +1005,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ ...@@ -1007,8 +1005,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
__entry->s_state, __entry->s_state,
__entry->hw_flow_index, __entry->hw_flow_index,
__entry->generation, __entry->generation,
__entry->fpsn, __entry->fpsn
__entry->flow_flags
) )
); );
...@@ -1338,7 +1335,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ ...@@ -1338,7 +1335,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */
__field(u32, hw_flow_index) __field(u32, hw_flow_index)
__field(u32, generation) __field(u32, generation)
__field(u32, fpsn) __field(u32, fpsn)
__field(u32, flow_flags)
__field(bool, resync) __field(bool, resync)
__field(u32, r_next_psn_kdeth) __field(u32, r_next_psn_kdeth)
), ),
...@@ -1360,7 +1356,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ ...@@ -1360,7 +1356,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */
__entry->hw_flow_index = priv->flow_state.index; __entry->hw_flow_index = priv->flow_state.index;
__entry->generation = priv->flow_state.generation; __entry->generation = priv->flow_state.generation;
__entry->fpsn = priv->flow_state.psn; __entry->fpsn = priv->flow_state.psn;
__entry->flow_flags = priv->flow_state.flags;
__entry->resync = priv->resync; __entry->resync = priv->resync;
__entry->r_next_psn_kdeth = priv->r_next_psn_kdeth; __entry->r_next_psn_kdeth = priv->r_next_psn_kdeth;
), ),
...@@ -1381,7 +1376,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ ...@@ -1381,7 +1376,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */
__entry->hw_flow_index, __entry->hw_flow_index,
__entry->generation, __entry->generation,
__entry->fpsn, __entry->fpsn,
__entry->flow_flags,
__entry->resync ? "yes" : "no", __entry->resync ? "yes" : "no",
__entry->r_next_psn_kdeth __entry->r_next_psn_kdeth
) )
......
...@@ -585,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, ...@@ -585,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
u64 access_mask = ODP_READ_ALLOWED_BIT; u64 access_mask;
u64 start_idx, page_mask; u64 start_idx, page_mask;
struct ib_umem_odp *odp; struct ib_umem_odp *odp;
size_t size; size_t size;
...@@ -607,6 +607,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, ...@@ -607,6 +607,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
page_shift = mr->umem->page_shift; page_shift = mr->umem->page_shift;
page_mask = ~(BIT(page_shift) - 1); page_mask = ~(BIT(page_shift) - 1);
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
access_mask = ODP_READ_ALLOWED_BIT;
if (prefetch && !downgrade && !mr->umem->writable) { if (prefetch && !downgrade && !mr->umem->writable) {
/* prefetch with write-access must /* prefetch with write-access must
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment