Commit 82771f20 authored by Doug Ledford's avatar Doug Ledford

Merge branch 'wip/dl-for-next' into for-next

Due to concurrent work by myself and Jason, a normal fast forward merge
was not possible.  This brings in a number of hfi1 changes, mainly the
hfi1 TID RDMA support (roughly 10,000 LOC change), which was reviewed
and integrated over a period of days.
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parents ecb53feb 416fbc1b
...@@ -24,6 +24,7 @@ hfi1-y := \ ...@@ -24,6 +24,7 @@ hfi1-y := \
mad.o \ mad.o \
mmu_rb.o \ mmu_rb.o \
msix.o \ msix.o \
opfn.o \
pcie.o \ pcie.o \
pio.o \ pio.o \
pio_copy.o \ pio_copy.o \
......
...@@ -4253,6 +4253,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { ...@@ -4253,6 +4253,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_pio_drain), access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL, [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
access_sw_kmem_wait), access_sw_kmem_wait),
[C_SW_TID_WAIT] = CNTR_ELEM("TidWait", 0, 0, CNTR_NORMAL,
hfi1_access_sw_tid_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL, [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
access_sw_send_schedule), access_sw_send_schedule),
[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn", [C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
...@@ -5222,6 +5224,17 @@ int is_bx(struct hfi1_devdata *dd) ...@@ -5222,6 +5224,17 @@ int is_bx(struct hfi1_devdata *dd)
return (chip_rev_minor & 0xF0) == 0x10; return (chip_rev_minor & 0xF0) == 0x10;
} }
/* return true is kernel urg disabled for rcd */
bool is_urg_masked(struct hfi1_ctxtdata *rcd)
{
u64 mask;
u32 is = IS_RCVURGENT_START + rcd->ctxt;
u8 bit = is % 64;
mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
return !(mask & BIT_ULL(bit));
}
/* /*
* Append string s to buffer buf. Arguments curp and len are the current * Append string s to buffer buf. Arguments curp and len are the current
* position and remaining length, respectively. * position and remaining length, respectively.
......
#ifndef _CHIP_H #ifndef _CHIP_H
#define _CHIP_H #define _CHIP_H
/* /*
* Copyright(c) 2015 - 2017 Intel Corporation. * Copyright(c) 2015 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
...@@ -804,6 +804,7 @@ void clear_linkup_counters(struct hfi1_devdata *dd); ...@@ -804,6 +804,7 @@ void clear_linkup_counters(struct hfi1_devdata *dd);
u32 hdrqempty(struct hfi1_ctxtdata *rcd); u32 hdrqempty(struct hfi1_ctxtdata *rcd);
int is_ax(struct hfi1_devdata *dd); int is_ax(struct hfi1_devdata *dd);
int is_bx(struct hfi1_devdata *dd); int is_bx(struct hfi1_devdata *dd);
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
u32 read_physical_state(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
const char *opa_lstate_name(u32 lstate); const char *opa_lstate_name(u32 lstate);
...@@ -926,6 +927,7 @@ enum { ...@@ -926,6 +927,7 @@ enum {
C_SW_PIO_WAIT, C_SW_PIO_WAIT,
C_SW_PIO_DRAIN, C_SW_PIO_DRAIN,
C_SW_KMEM_WAIT, C_SW_KMEM_WAIT,
C_SW_TID_WAIT,
C_SW_SEND_SCHED, C_SW_SEND_SCHED,
C_SDMA_DESC_FETCHED_CNT, C_SDMA_DESC_FETCHED_CNT,
C_SDMA_INT_CNT, C_SDMA_INT_CNT,
......
...@@ -340,6 +340,10 @@ struct diag_pkt { ...@@ -340,6 +340,10 @@ struct diag_pkt {
#define HFI1_PSM_IOC_BASE_SEQ 0x0 #define HFI1_PSM_IOC_BASE_SEQ 0x0
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
#define HFI1_KDETH_BTH_SEQ_SHIFT 11
#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
static inline __u64 rhf_to_cpu(const __le32 *rbuf) static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{ {
return __le64_to_cpu(*((__le64 *)rbuf)); return __le64_to_cpu(*((__le64 *)rbuf));
......
...@@ -1575,13 +1575,11 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) ...@@ -1575,13 +1575,11 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
return -EINVAL; return -EINVAL;
} }
void handle_eflags(struct hfi1_packet *packet) static void show_eflags_errs(struct hfi1_packet *packet)
{ {
struct hfi1_ctxtdata *rcd = packet->rcd; struct hfi1_ctxtdata *rcd = packet->rcd;
u32 rte = rhf_rcv_type_err(packet->rhf); u32 rte = rhf_rcv_type_err(packet->rhf);
rcv_hdrerr(rcd, rcd->ppd, packet);
if (rhf_err_flags(packet->rhf))
dd_dev_err(rcd->dd, dd_dev_err(rcd->dd,
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
rcd->ctxt, packet->rhf, rcd->ctxt, packet->rhf,
...@@ -1596,6 +1594,15 @@ void handle_eflags(struct hfi1_packet *packet) ...@@ -1596,6 +1594,15 @@ void handle_eflags(struct hfi1_packet *packet)
rte); rte);
} }
void handle_eflags(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
rcv_hdrerr(rcd, rcd->ppd, packet);
if (rhf_err_flags(packet->rhf))
show_eflags_errs(packet);
}
/* /*
* The following functions are called by the interrupt handler. They are type * The following functions are called by the interrupt handler. They are type
* specific handlers for each packet type. * specific handlers for each packet type.
...@@ -1699,11 +1706,14 @@ static int kdeth_process_expected(struct hfi1_packet *packet) ...@@ -1699,11 +1706,14 @@ static int kdeth_process_expected(struct hfi1_packet *packet)
if (unlikely(hfi1_dbg_should_fault_rx(packet))) if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE; return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf))) if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet); struct hfi1_ctxtdata *rcd = packet->rcd;
dd_dev_err(packet->rcd->dd, if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
"Unhandled expected packet received. Dropping.\n"); return RHF_RCV_CONTINUE;
}
hfi1_kdeth_expected_rcv(packet);
return RHF_RCV_CONTINUE; return RHF_RCV_CONTINUE;
} }
...@@ -1712,11 +1722,17 @@ static int kdeth_process_eager(struct hfi1_packet *packet) ...@@ -1712,11 +1722,17 @@ static int kdeth_process_eager(struct hfi1_packet *packet)
hfi1_setup_9B_packet(packet); hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet))) if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE; return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
dd_dev_err(packet->rcd->dd, trace_hfi1_rcvhdr(packet);
"Unhandled eager packet received. Dropping.\n"); if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
show_eflags_errs(packet);
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
return RHF_RCV_CONTINUE;
}
hfi1_kdeth_eager_rcv(packet);
return RHF_RCV_CONTINUE; return RHF_RCV_CONTINUE;
} }
......
...@@ -73,6 +73,7 @@ ...@@ -73,6 +73,7 @@
#include "chip_registers.h" #include "chip_registers.h"
#include "common.h" #include "common.h"
#include "opfn.h"
#include "verbs.h" #include "verbs.h"
#include "pio.h" #include "pio.h"
#include "chip.h" #include "chip.h"
...@@ -98,6 +99,8 @@ ...@@ -98,6 +99,8 @@
#define NEIGHBOR_TYPE_HFI 0 #define NEIGHBOR_TYPE_HFI 0
#define NEIGHBOR_TYPE_SWITCH 1 #define NEIGHBOR_TYPE_SWITCH 1
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
extern unsigned long hfi1_cap_mask; extern unsigned long hfi1_cap_mask;
#define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap) #define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
#define HFI1_CAP_UGET_MASK(mask, cap) \ #define HFI1_CAP_UGET_MASK(mask, cap) \
...@@ -195,6 +198,14 @@ struct exp_tid_set { ...@@ -195,6 +198,14 @@ struct exp_tid_set {
}; };
typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
struct tid_queue {
struct list_head queue_head;
/* queue head for QP TID resource waiters */
u32 enqueue; /* count of tid enqueues */
u32 dequeue; /* count of tid dequeues */
};
struct hfi1_ctxtdata { struct hfi1_ctxtdata {
/* rcvhdrq base, needs mmap before useful */ /* rcvhdrq base, needs mmap before useful */
void *rcvhdrq; void *rcvhdrq;
...@@ -288,6 +299,12 @@ struct hfi1_ctxtdata { ...@@ -288,6 +299,12 @@ struct hfi1_ctxtdata {
/* PSM Specific fields */ /* PSM Specific fields */
/* lock protecting all Expected TID data */ /* lock protecting all Expected TID data */
struct mutex exp_mutex; struct mutex exp_mutex;
/* lock protecting all Expected TID data of kernel contexts */
spinlock_t exp_lock;
/* Queue for QP's waiting for HW TID flows */
struct tid_queue flow_queue;
/* Queue for QP's waiting for HW receive array entries */
struct tid_queue rarr_queue;
/* when waiting for rcv or pioavail */ /* when waiting for rcv or pioavail */
wait_queue_head_t wait; wait_queue_head_t wait;
/* uuid from PSM */ /* uuid from PSM */
...@@ -320,6 +337,9 @@ struct hfi1_ctxtdata { ...@@ -320,6 +337,9 @@ struct hfi1_ctxtdata {
*/ */
u8 subctxt_cnt; u8 subctxt_cnt;
/* Bit mask to track free TID RDMA HW flows */
unsigned long flow_mask;
struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
}; };
/** /**
...@@ -2100,7 +2120,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, ...@@ -2100,7 +2120,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
#endif #endif
HFI1_PKT_USER_SC_INTEGRITY; HFI1_PKT_USER_SC_INTEGRITY;
else else if (ctxt_type != SC_KERNEL)
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
/* turn on send-side job key checks if !A0 */ /* turn on send-side job key checks if !A0 */
......
...@@ -72,7 +72,6 @@ ...@@ -72,7 +72,6 @@
#undef pr_fmt #undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/* /*
* min buffers we want to have per context, after driver * min buffers we want to have per context, after driver
*/ */
...@@ -371,6 +370,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, ...@@ -371,6 +370,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
mutex_init(&rcd->exp_mutex); mutex_init(&rcd->exp_mutex);
spin_lock_init(&rcd->exp_lock);
INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt); hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
...@@ -473,6 +475,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, ...@@ -473,6 +475,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
GFP_KERNEL, numa); GFP_KERNEL, numa);
if (!rcd->opstats) if (!rcd->opstats)
goto bail; goto bail;
/* Initialize TID flow generations for the context */
hfi1_kern_init_ctxt_generations(rcd);
} }
*context = rcd; *context = rcd;
...@@ -772,6 +777,8 @@ static void enable_chip(struct hfi1_devdata *dd) ...@@ -772,6 +777,8 @@ static void enable_chip(struct hfi1_devdata *dd)
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL)) if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
if (HFI1_CAP_IS_KSET(TID_RDMA))
rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
hfi1_rcvctrl(dd, rcvmask, rcd); hfi1_rcvctrl(dd, rcvmask, rcd);
sc_enable(rcd->sc); sc_enable(rcd->sc);
hfi1_rcd_put(rcd); hfi1_rcd_put(rcd);
...@@ -927,6 +934,8 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) ...@@ -927,6 +934,8 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
lastfail = hfi1_create_rcvhdrq(dd, rcd); lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail) if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd); lastfail = hfi1_setup_eagerbufs(rcd);
if (!lastfail)
lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
if (lastfail) { if (lastfail) {
dd_dev_err(dd, dd_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
...@@ -1497,6 +1506,13 @@ static int __init hfi1_mod_init(void) ...@@ -1497,6 +1506,13 @@ static int __init hfi1_mod_init(void)
/* sanitize link CRC options */ /* sanitize link CRC options */
link_crc_mask &= SUPPORTED_CRCS; link_crc_mask &= SUPPORTED_CRCS;
ret = opfn_init();
if (ret < 0) {
pr_err("Failed to allocate opfn_wq");
goto bail_dev;
}
hfi1_compute_tid_rdma_flow_wt();
/* /*
* These must be called before the driver is registered with * These must be called before the driver is registered with
* the PCI subsystem. * the PCI subsystem.
...@@ -1527,6 +1543,7 @@ module_init(hfi1_mod_init); ...@@ -1527,6 +1543,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void) static void __exit hfi1_mod_cleanup(void)
{ {
pci_unregister_driver(&hfi1_pci_driver); pci_unregister_driver(&hfi1_pci_driver);
opfn_exit();
node_affinity_destroy_all(); node_affinity_destroy_all();
hfi1_dbg_exit(); hfi1_dbg_exit();
...@@ -1581,7 +1598,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) ...@@ -1581,7 +1598,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
if (rcd) { if (rcd) {
hfi1_clear_tids(rcd); hfi1_free_ctxt_rcv_groups(rcd);
hfi1_free_ctxt(rcd); hfi1_free_ctxt(rcd);
} }
} }
......
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
#include "iowait.h" #include "iowait.h"
#include "trace_iowait.h" #include "trace_iowait.h"
/* 1 priority == 16 starve_cnt */
#define IOWAIT_PRIORITY_STARVE_SHIFT 4
void iowait_set_flag(struct iowait *wait, u32 flag) void iowait_set_flag(struct iowait *wait, u32 flag)
{ {
trace_hfi1_iowait_set(wait, flag); trace_hfi1_iowait_set(wait, flag);
...@@ -44,7 +47,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit, ...@@ -44,7 +47,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq, uint seq,
bool pkts_sent), bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason), void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait)) void (*sdma_drained)(struct iowait *wait),
void (*init_priority)(struct iowait *wait))
{ {
int i; int i;
...@@ -58,6 +62,7 @@ void iowait_init(struct iowait *wait, u32 tx_limit, ...@@ -58,6 +62,7 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
wait->sleep = sleep; wait->sleep = sleep;
wait->wakeup = wakeup; wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained; wait->sdma_drained = sdma_drained;
wait->init_priority = init_priority;
wait->flags = 0; wait->flags = 0;
for (i = 0; i < IOWAIT_SES; i++) { for (i = 0; i < IOWAIT_SES; i++) {
wait->wait[i].iow = wait; wait->wait[i].iow = wait;
...@@ -92,3 +97,30 @@ int iowait_set_work_flag(struct iowait_work *w) ...@@ -92,3 +97,30 @@ int iowait_set_work_flag(struct iowait_work *w)
iowait_set_flag(w->iow, IOWAIT_PENDING_TID); iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
return IOWAIT_TID_SE; return IOWAIT_TID_SE;
} }
/**
* iowait_priority_update_top - update the top priority entry
* @w: the iowait struct
* @top: a pointer to the top priority entry
* @idx: the index of the current iowait in an array
* @top_idx: the array index for the iowait entry that has the top priority
*
* This function is called to compare the priority of a given
* iowait with the given top priority entry. The top index will
* be returned.
*/
uint iowait_priority_update_top(struct iowait *w,
struct iowait *top,
uint idx, uint top_idx)
{
u8 cnt, tcnt;
/* Convert priority into starve_cnt and compare the total.*/
cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
top->starved_cnt;
if (cnt > tcnt)
return idx;
else
return top_idx;
}
...@@ -100,6 +100,7 @@ struct iowait_work { ...@@ -100,6 +100,7 @@ struct iowait_work {
* @sleep: no space callback * @sleep: no space callback
* @wakeup: space callback wakeup * @wakeup: space callback wakeup
* @sdma_drained: sdma count drained * @sdma_drained: sdma count drained
* @init_priority: callback to manipulate priority
* @lock: lock protected head of wait queue * @lock: lock protected head of wait queue
* @iowork: workqueue overhead * @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0 * @wait_dma: wait for sdma_busy == 0
...@@ -109,7 +110,7 @@ struct iowait_work { ...@@ -109,7 +110,7 @@ struct iowait_work {
* @tx_limit: limit for overflow queuing * @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list * @tx_count: number of tx entry's in tx_head'ed list
* @flags: wait flags (one per QP) * @flags: wait flags (one per QP)
* @wait: SE array * @wait: SE array for multiple legs
* *
* This is to be embedded in user's state structure * This is to be embedded in user's state structure
* (QP or PQ). * (QP or PQ).
...@@ -120,10 +121,13 @@ struct iowait_work { ...@@ -120,10 +121,13 @@ struct iowait_work {
* are callbacks for the ULP to implement * are callbacks for the ULP to implement
* what ever queuing/dequeuing of * what ever queuing/dequeuing of
* the embedded iowait and its containing struct * the embedded iowait and its containing struct
* when a resource shortage like SDMA ring space is seen. * when a resource shortage like SDMA ring space
* or PIO credit space is seen.
* *
* Both potentially have locks help * Both potentially have locks help
* so sleeping is not allowed. * so sleeping is not allowed and it is not
* supported to submit txreqs from the wakeup
* call directly because of lock conflicts.
* *
* The wait_dma member along with the iow * The wait_dma member along with the iow
* *
...@@ -143,6 +147,7 @@ struct iowait { ...@@ -143,6 +147,7 @@ struct iowait {
); );
void (*wakeup)(struct iowait *wait, int reason); void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait); void (*sdma_drained)(struct iowait *wait);
void (*init_priority)(struct iowait *wait);
seqlock_t *lock; seqlock_t *lock;
wait_queue_head_t wait_dma; wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio; wait_queue_head_t wait_pio;
...@@ -152,6 +157,7 @@ struct iowait { ...@@ -152,6 +157,7 @@ struct iowait {
u32 tx_limit; u32 tx_limit;
u32 tx_count; u32 tx_count;
u8 starved_cnt; u8 starved_cnt;
u8 priority;
unsigned long flags; unsigned long flags;
struct iowait_work wait[IOWAIT_SES]; struct iowait_work wait[IOWAIT_SES];
}; };
...@@ -171,7 +177,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit, ...@@ -171,7 +177,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq, uint seq,
bool pkts_sent), bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason), void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait)); void (*sdma_drained)(struct iowait *wait),
void (*init_priority)(struct iowait *wait));
/** /**
* iowait_schedule() - schedule the default send engine work * iowait_schedule() - schedule the default send engine work
...@@ -185,6 +192,18 @@ static inline bool iowait_schedule(struct iowait *wait, ...@@ -185,6 +192,18 @@ static inline bool iowait_schedule(struct iowait *wait,
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork); return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
} }
/**
* iowait_tid_schedule - schedule the tid SE
* @wait: the iowait structure
* @wq: the work queue
* @cpu: the cpu
*/
static inline bool iowait_tid_schedule(struct iowait *wait,
struct workqueue_struct *wq, int cpu)
{
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_TID_SE].iowork);
}
/** /**
* iowait_sdma_drain() - wait for DMAs to drain * iowait_sdma_drain() - wait for DMAs to drain
* *
...@@ -327,6 +346,8 @@ static inline u16 iowait_get_desc(struct iowait_work *w) ...@@ -327,6 +346,8 @@ static inline u16 iowait_get_desc(struct iowait_work *w)
tx = list_first_entry(&w->tx_head, struct sdma_txreq, tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list); list);
num_desc = tx->num_desc; num_desc = tx->num_desc;
if (tx->flags & SDMA_TXREQ_F_VIP)
w->iow->priority++;
} }
return num_desc; return num_desc;
} }
...@@ -340,6 +361,37 @@ static inline u32 iowait_get_all_desc(struct iowait *w) ...@@ -340,6 +361,37 @@ static inline u32 iowait_get_all_desc(struct iowait *w)
return num_desc; return num_desc;
} }
static inline void iowait_update_priority(struct iowait_work *w)
{
struct sdma_txreq *tx = NULL;
if (!list_empty(&w->tx_head)) {
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
if (tx->flags & SDMA_TXREQ_F_VIP)
w->iow->priority++;
}
}
static inline void iowait_update_all_priority(struct iowait *w)
{
iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
}
static inline void iowait_init_priority(struct iowait *w)
{
w->priority = 0;
if (w->init_priority)
w->init_priority(w);
}
static inline void iowait_get_priority(struct iowait *w)
{
iowait_init_priority(w);
iowait_update_all_priority(w);
}
/** /**
* iowait_queue - Put the iowait on a wait queue * iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing? * @pkts_sent: have some packets been sent before queuing?
...@@ -356,14 +408,18 @@ static inline void iowait_queue(bool pkts_sent, struct iowait *w, ...@@ -356,14 +408,18 @@ static inline void iowait_queue(bool pkts_sent, struct iowait *w,
/* /*
* To play fair, insert the iowait at the tail of the wait queue if it * To play fair, insert the iowait at the tail of the wait queue if it
* has already sent some packets; Otherwise, put it at the head. * has already sent some packets; Otherwise, put it at the head.
* However, if it has priority packets to send, also put it at the
* head.
*/ */
if (pkts_sent) { if (pkts_sent)
list_add_tail(&w->list, wait_head);
w->starved_cnt = 0; w->starved_cnt = 0;
} else { else
list_add(&w->list, wait_head);
w->starved_cnt++; w->starved_cnt++;
}
if (w->priority > 0 || !pkts_sent)
list_add(&w->list, wait_head);
else
list_add_tail(&w->list, wait_head);
} }
/** /**
...@@ -380,27 +436,10 @@ static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w) ...@@ -380,27 +436,10 @@ static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
w->starved_cnt = 0; w->starved_cnt = 0;
} }
/** /* Update the top priority index */
* iowait_starve_find_max - Find the maximum of the starve count uint iowait_priority_update_top(struct iowait *w,
* @w: the iowait struct struct iowait *top,
* @max: a variable containing the max starve count uint idx, uint top_idx);
* @idx: the index of the current iowait in an array
* @max_idx: a variable containing the array index for the
* iowait entry that has the max starve count
*
* This function is called to compare the starve count of a
* given iowait with the given max starve count. The max starve
* count and the index will be updated if the iowait's start
* count is larger.
*/
static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
uint idx, uint *max_idx)
{
if (w->starved_cnt > *max) {
*max = w->starved_cnt;
*max_idx = idx;
}
}
/** /**
* iowait_packet_queued() - determine if a packet is queued * iowait_packet_queued() - determine if a packet is queued
......
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#include "hfi.h"
#include "trace.h"
#include "qp.h"
#include "opfn.h"
#define IB_BTHE_E BIT(IB_BTHE_E_SHIFT)
#define OPFN_CODE(code) BIT((code) - 1)
#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
struct hfi1_opfn_type {
bool (*request)(struct rvt_qp *qp, u64 *data);
bool (*response)(struct rvt_qp *qp, u64 *data);
bool (*reply)(struct rvt_qp *qp, u64 data);
void (*error)(struct rvt_qp *qp);
};
static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
[STL_VERBS_EXTD_TID_RDMA] = {
.request = tid_rdma_conn_req,
.response = tid_rdma_conn_resp,
.reply = tid_rdma_conn_reply,
.error = tid_rdma_conn_error,
},
};
static struct workqueue_struct *opfn_wq;
static void opfn_schedule_conn_request(struct rvt_qp *qp);
static bool hfi1_opfn_extended(u32 bth1)
{
return !!(bth1 & IB_BTHE_E);
}
static void opfn_conn_request(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_atomic_wr wr;
u16 mask, capcode;
struct hfi1_opfn_type *extd;
u64 data;
unsigned long flags;
int ret = 0;
trace_hfi1_opfn_state_conn_request(qp);
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* Exit if the extended bit is not set, or if nothing is requested, or
* if we have completed all requests, or if a previous request is in
* progress
*/
if (!priv->opfn.extended || !priv->opfn.requested ||
priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
goto done;
mask = priv->opfn.requested & ~priv->opfn.completed;
capcode = ilog2(mask & ~(mask - 1)) + 1;
if (capcode >= STL_VERBS_EXTD_MAX) {
priv->opfn.completed |= OPFN_CODE(capcode);
goto done;
}
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->request || !extd->request(qp, &data)) {
/*
* Either there is no handler for this capability or the request
* packet could not be generated. Either way, mark it as done so
* we don't keep attempting to complete it.
*/
priv->opfn.completed |= OPFN_CODE(capcode);
goto done;
}
trace_hfi1_opfn_data_conn_request(qp, capcode, data);
data = (data & ~0xf) | capcode;
memset(&wr, 0, sizeof(wr));
wr.wr.opcode = IB_WR_OPFN;
wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
wr.compare_add = data;
priv->opfn.curr = capcode; /* A new request is now in progress */
/* Drop opfn.lock before calling ib_post_send() */
spin_unlock_irqrestore(&priv->opfn.lock, flags);
ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
if (ret)
goto err;
trace_hfi1_opfn_state_conn_request(qp);
return;
err:
trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
(u64)ret);
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* In case of an unexpected error return from ib_post_send
* clear opfn.curr and reschedule to try again
*/
priv->opfn.curr = STL_VERBS_EXTD_NONE;
opfn_schedule_conn_request(qp);
done:
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_send_conn_request(struct work_struct *work)
{
struct hfi1_opfn_data *od;
struct hfi1_qp_priv *qpriv;
od = container_of(work, struct hfi1_opfn_data, opfn_work);
qpriv = container_of(od, struct hfi1_qp_priv, opfn);
opfn_conn_request(qpriv->owner);
}
/*
* When QP s_lock is held in the caller, the OPFN request must be scheduled
* to a different workqueue to avoid double locking QP s_lock in call to
* ib_post_send in opfn_conn_request
*/
static void opfn_schedule_conn_request(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
trace_hfi1_opfn_state_sched_conn_request(qp);
queue_work(opfn_wq, &priv->opfn.opfn_work);
}
void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_atomic_eth *ateth)
{
struct hfi1_qp_priv *priv = qp->priv;
u64 data = be64_to_cpu(ateth->compare_data);
struct hfi1_opfn_type *extd;
u8 capcode;
unsigned long flags;
trace_hfi1_opfn_state_conn_response(qp);
capcode = data & 0xf;
trace_hfi1_opfn_data_conn_response(qp, capcode, data);
if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
return;
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->response) {
e->atomic_data = capcode;
return;
}
spin_lock_irqsave(&priv->opfn.lock, flags);
if (priv->opfn.completed & OPFN_CODE(capcode)) {
/*
* We are receiving a request for a feature that has already
* been negotiated. This may mean that the other side has reset
*/
priv->opfn.completed &= ~OPFN_CODE(capcode);
if (extd->error)
extd->error(qp);
}
if (extd->response(qp, &data))
priv->opfn.completed |= OPFN_CODE(capcode);
e->atomic_data = (data & ~0xf) | capcode;
trace_hfi1_opfn_state_conn_response(qp);
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_conn_reply(struct rvt_qp *qp, u64 data)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_opfn_type *extd;
u8 capcode;
unsigned long flags;
trace_hfi1_opfn_state_conn_reply(qp);
capcode = data & 0xf;
trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
return;
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* Either there is no previous request or the reply is not for the
* current request
*/
if (!priv->opfn.curr || capcode != priv->opfn.curr)
goto done;
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->reply)
goto clear;
if (extd->reply(qp, data))
priv->opfn.completed |= OPFN_CODE(capcode);
clear:
/*
* Clear opfn.curr to indicate that the previous request is no longer in
* progress
*/
priv->opfn.curr = STL_VERBS_EXTD_NONE;
trace_hfi1_opfn_state_conn_reply(qp);
done:
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_conn_error(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_opfn_type *extd = NULL;
unsigned long flags;
u16 capcode;
trace_hfi1_opfn_state_conn_error(qp);
trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
/*
* The QP has gone into the Error state. We have to invalidate all
* negotiated feature, including the one in progress (if any). The RC
* QP handling will clean the WQE for the connection request.
*/
spin_lock_irqsave(&priv->opfn.lock, flags);
while (priv->opfn.completed) {
capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
if (extd->error)
extd->error(qp);
priv->opfn.completed &= ~OPFN_CODE(capcode);
}
priv->opfn.extended = 0;
priv->opfn.requested = 0;
priv->opfn.curr = STL_VERBS_EXTD_NONE;
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
{
struct ib_qp *ibqp = &qp->ibqp;
struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
if (attr_mask & IB_QP_RETRY_CNT)
priv->s_retry = attr->retry_cnt;
spin_lock_irqsave(&priv->opfn.lock, flags);
if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
struct tid_rdma_params *local = &priv->tid_rdma.local;
if (attr_mask & IB_QP_TIMEOUT)
priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
tid_rdma_opfn_init(qp, local);
/*
* We only want to set the OPFN requested bit when the
* QP transitions to RTS.
*/
if (attr_mask & IB_QP_STATE &&
attr->qp_state == IB_QPS_RTS) {
priv->opfn.requested |= OPFN_MASK(TID_RDMA);
/*
* If the QP is transitioning to RTS and the
* opfn.completed for TID RDMA has already been
* set, the QP is being moved *back* into RTS.
* We can now renegotiate the TID RDMA
* parameters.
*/
if (priv->opfn.completed &
OPFN_MASK(TID_RDMA)) {
priv->opfn.completed &=
~OPFN_MASK(TID_RDMA);
/*
* Since the opfn.completed bit was
* already set, it is safe to assume
* that the opfn.extended is also set.
*/
opfn_schedule_conn_request(qp);
}
}
} else {
memset(local, 0, sizeof(*local));
}
}
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
{
struct hfi1_qp_priv *priv = qp->priv;
if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
HFI1_CAP_IS_KSET(OPFN)) {
priv->opfn.extended = 1;
if (qp->state == IB_QPS_RTS)
opfn_conn_request(qp);
}
}
int opfn_init(void)
{
opfn_wq = alloc_workqueue("hfi_opfn",
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
WQ_MEM_RECLAIM,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
if (!opfn_wq)
return -ENOMEM;
return 0;
}
void opfn_exit(void)
{
if (opfn_wq) {
destroy_workqueue(opfn_wq);
opfn_wq = NULL;
}
}
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef _HFI1_OPFN_H
#define _HFI1_OPFN_H
/**
* DOC: Omni Path Feature Negotion (OPFN)
*
* OPFN is a discovery protocol for Intel Omni-Path fabric that
* allows two RC QPs to negotiate a common feature that both QPs
* can support. Currently, the only OPA feature that OPFN
* supports is TID RDMA.
*
* Architecture
*
* OPFN involves the communication between two QPs on the HFI
* level on an Omni-Path fabric, and ULPs have no knowledge of
* OPFN at all.
*
* Implementation
*
* OPFN extends the existing IB RC protocol with the following
* changes:
* -- Uses Bit 24 (reserved) of DWORD 1 of Base Transport
* Header (BTH1) to indicate that the RC QP supports OPFN;
* -- Uses a combination of RC COMPARE_SWAP opcode (0x13) and
* the address U64_MAX (0xFFFFFFFFFFFFFFFF) as an OPFN
* request; The 64-bit data carried with the request/response
* contains the parameters for negotiation and will be
* defined in tid_rdma.c file;
* -- Defines IB_WR_RESERVED3 as IB_WR_OPFN.
*
* The OPFN communication will be triggered when an RC QP
* receives a request with Bit 24 of BTH1 set. The responder QP
* will then post send an OPFN request with its local
* parameters, which will be sent to the requester QP once all
* existing requests on the responder QP side have been sent.
* Once the requester QP receives the OPFN request, it will
* keep a copy of the responder QP's parameters, and return a
* response packet with its own local parameters. The responder
* QP receives the response packet and keeps a copy of the requester
* QP's parameters. After this exchange, each side has the parameters
* for both sides and therefore can select the right parameters
* for future transactions
*/
/* STL Verbs Extended */
#define IB_BTHE_E_SHIFT 24
#define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX
struct ib_atomic_eth;
enum hfi1_opfn_codes {
STL_VERBS_EXTD_NONE = 0,
STL_VERBS_EXTD_TID_RDMA,
STL_VERBS_EXTD_MAX
};
struct hfi1_opfn_data {
u8 extended;
u16 requested;
u16 completed;
enum hfi1_opfn_codes curr;
/* serialize opfn function calls */
spinlock_t lock;
struct work_struct opfn_work;
};
/* WR opcode for OPFN */
#define IB_WR_OPFN IB_WR_RESERVED3
void opfn_send_conn_request(struct work_struct *work);
void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_atomic_eth *ateth);
void opfn_conn_reply(struct rvt_qp *qp, u64 data);
void opfn_conn_error(struct rvt_qp *qp);
void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask);
void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1);
int opfn_init(void);
void opfn_exit(void);
#endif /* _HFI1_OPFN_H */
...@@ -1599,8 +1599,7 @@ static void sc_piobufavail(struct send_context *sc) ...@@ -1599,8 +1599,7 @@ static void sc_piobufavail(struct send_context *sc)
struct rvt_qp *qp; struct rvt_qp *qp;
struct hfi1_qp_priv *priv; struct hfi1_qp_priv *priv;
unsigned long flags; unsigned long flags;
uint i, n = 0, max_idx = 0; uint i, n = 0, top_idx = 0;
u8 max_starved_cnt = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15) dd->send_contexts[sc->sw_index].type != SC_VL15)
...@@ -1619,11 +1618,18 @@ static void sc_piobufavail(struct send_context *sc) ...@@ -1619,11 +1618,18 @@ static void sc_piobufavail(struct send_context *sc)
if (n == ARRAY_SIZE(qps)) if (n == ARRAY_SIZE(qps))
break; break;
wait = list_first_entry(list, struct iowait, list); wait = list_first_entry(list, struct iowait, list);
iowait_get_priority(wait);
qp = iowait_to_qp(wait); qp = iowait_to_qp(wait);
priv = qp->priv; priv = qp->priv;
list_del_init(&priv->s_iowait.list); list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL; priv->s_iowait.lock = NULL;
iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx); if (n) {
priv = qps[top_idx]->priv;
top_idx = iowait_priority_update_top(wait,
&priv->s_iowait,
n, top_idx);
}
/* refcount held until actual wake up */ /* refcount held until actual wake up */
qps[n++] = qp; qps[n++] = qp;
} }
...@@ -1638,12 +1644,12 @@ static void sc_piobufavail(struct send_context *sc) ...@@ -1638,12 +1644,12 @@ static void sc_piobufavail(struct send_context *sc)
} }
write_sequnlock_irqrestore(&sc->waitlock, flags); write_sequnlock_irqrestore(&sc->waitlock, flags);
/* Wake up the most starved one first */ /* Wake up the top-priority one first */
if (n) if (n)
hfi1_qp_wakeup(qps[max_idx], hfi1_qp_wakeup(qps[top_idx],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
if (i != max_idx) if (i != top_idx)
hfi1_qp_wakeup(qps[i], hfi1_qp_wakeup(qps[i],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
} }
......
...@@ -132,6 +132,18 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { ...@@ -132,6 +132,18 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
.qpt_support = BIT(IB_QPT_RC), .qpt_support = BIT(IB_QPT_RC),
}, },
[IB_WR_OPFN] = {
.length = sizeof(struct ib_atomic_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_USE_RESERVE,
},
[IB_WR_TID_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_IGN_RNR_CNT,
},
}; };
static void flush_list_head(struct list_head *l) static void flush_list_head(struct list_head *l)
...@@ -285,6 +297,8 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, ...@@ -285,6 +297,8 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
qp_set_16b(qp); qp_set_16b(qp);
} }
opfn_qp_init(qp, attr, attr_mask);
} }
/** /**
...@@ -311,6 +325,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) ...@@ -311,6 +325,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
switch (qp->ibqp.qp_type) { switch (qp->ibqp.qp_type) {
case IB_QPT_RC: case IB_QPT_RC:
hfi1_setup_tid_rdma_wqe(qp, wqe);
case IB_QPT_UC: case IB_QPT_UC:
if (wqe->length > 0x80000000U) if (wqe->length > 0x80000000U)
return -EINVAL; return -EINVAL;
...@@ -422,6 +437,11 @@ static void hfi1_qp_schedule(struct rvt_qp *qp) ...@@ -422,6 +437,11 @@ static void hfi1_qp_schedule(struct rvt_qp *qp)
if (ret) if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
} }
if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
ret = hfi1_schedule_tid_send(qp);
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
} }
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
...@@ -441,8 +461,27 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) ...@@ -441,8 +461,27 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait) void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
{ {
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) struct hfi1_qp_priv *priv = qp->priv;
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
/*
* If we are sending a first-leg packet from the second leg,
* we need to clear the busy flag from priv->s_flags to
* avoid a race condition when the qp wakes up before
* the call to hfi1_verbs_send() returns to the second
* leg. In that case, the second leg will terminate without
* being re-scheduled, resulting in failure to send TID RDMA
* WRITE DATA and TID RDMA ACK packets.
*/
if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
RVT_S_BUSY);
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
} else {
priv->s_flags &= ~RVT_S_BUSY;
}
} }
static int iowait_sleep( static int iowait_sleep(
...@@ -479,6 +518,7 @@ static int iowait_sleep( ...@@ -479,6 +518,7 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++; ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC; qp->s_flags |= RVT_S_WAIT_DMA_DESC;
iowait_get_priority(&priv->s_iowait);
iowait_queue(pkts_sent, &priv->s_iowait, iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait); &sde->dmawait);
priv->s_iowait.lock = &sde->waitlock; priv->s_iowait.lock = &sde->waitlock;
...@@ -528,6 +568,17 @@ static void iowait_sdma_drained(struct iowait *wait) ...@@ -528,6 +568,17 @@ static void iowait_sdma_drained(struct iowait *wait)
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
} }
static void hfi1_init_priority(struct iowait *w)
{
struct rvt_qp *qp = iowait_to_qp(w);
struct hfi1_qp_priv *priv = qp->priv;
if (qp->s_flags & RVT_S_ACK_PENDING)
w->priority++;
if (priv->s_flags & RVT_S_ACK_PENDING)
w->priority++;
}
/** /**
* qp_to_sdma_engine - map a qp to a send engine * qp_to_sdma_engine - map a qp to a send engine
* @qp: the QP * @qp: the QP
...@@ -685,10 +736,11 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) ...@@ -685,10 +736,11 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait, &priv->s_iowait,
1, 1,
_hfi1_do_send, _hfi1_do_send,
NULL, _hfi1_do_tid_send,
iowait_sleep, iowait_sleep,
iowait_wakeup, iowait_wakeup,
iowait_sdma_drained); iowait_sdma_drained,
hfi1_init_priority);
return priv; return priv;
} }
...@@ -696,6 +748,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) ...@@ -696,6 +748,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{ {
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
hfi1_qp_priv_tid_free(rdi, qp);
kfree(priv->s_ahg); kfree(priv->s_ahg);
kfree(priv); kfree(priv);
} }
...@@ -729,6 +782,7 @@ void flush_qp_waiters(struct rvt_qp *qp) ...@@ -729,6 +782,7 @@ void flush_qp_waiters(struct rvt_qp *qp)
{ {
lockdep_assert_held(&qp->s_lock); lockdep_assert_held(&qp->s_lock);
flush_iowait(qp); flush_iowait(qp);
hfi1_tid_rdma_flush_wait(qp);
} }
void stop_send_queue(struct rvt_qp *qp) void stop_send_queue(struct rvt_qp *qp)
...@@ -736,12 +790,16 @@ void stop_send_queue(struct rvt_qp *qp) ...@@ -736,12 +790,16 @@ void stop_send_queue(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
iowait_cancel_work(&priv->s_iowait); iowait_cancel_work(&priv->s_iowait);
if (cancel_work_sync(&priv->tid_rdma.trigger_work))
rvt_put_qp(qp);
} }
void quiesce_qp(struct rvt_qp *qp) void quiesce_qp(struct rvt_qp *qp)
{ {
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
hfi1_del_tid_reap_timer(qp);
hfi1_del_tid_retry_timer(qp);
iowait_sdma_drain(&priv->s_iowait); iowait_sdma_drain(&priv->s_iowait);
qp_pio_drain(qp); qp_pio_drain(qp);
flush_tx_list(qp); flush_tx_list(qp);
...@@ -749,8 +807,13 @@ void quiesce_qp(struct rvt_qp *qp) ...@@ -749,8 +807,13 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp) void notify_qp_reset(struct rvt_qp *qp)
{ {
hfi1_qp_kern_exp_rcv_clear_all(qp);
qp->r_adefered = 0; qp->r_adefered = 0;
clear_ahg(qp); clear_ahg(qp);
/* Clear any OPFN state */
if (qp->ibqp.qp_type == IB_QPT_RC)
opfn_conn_error(qp);
} }
/* /*
...@@ -832,7 +895,8 @@ void notify_error_qp(struct rvt_qp *qp) ...@@ -832,7 +895,8 @@ void notify_error_qp(struct rvt_qp *qp)
if (lock) { if (lock) {
write_seqlock(lock); write_seqlock(lock);
if (!list_empty(&priv->s_iowait.list) && if (!list_empty(&priv->s_iowait.list) &&
!(qp->s_flags & RVT_S_BUSY)) { !(qp->s_flags & RVT_S_BUSY) &&
!(priv->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO; qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list); list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL; priv->s_iowait.lock = NULL;
...@@ -841,7 +905,8 @@ void notify_error_qp(struct rvt_qp *qp) ...@@ -841,7 +905,8 @@ void notify_error_qp(struct rvt_qp *qp)
write_sequnlock(lock); write_sequnlock(lock);
} }
if (!(qp->s_flags & RVT_S_BUSY)) { if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) { if (qp->s_rdma_mr) {
rvt_put_mr(qp->s_rdma_mr); rvt_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL; qp->s_rdma_mr = NULL;
......
...@@ -63,11 +63,17 @@ extern const struct rvt_operation_params hfi1_post_parms[]; ...@@ -63,11 +63,17 @@ extern const struct rvt_operation_params hfi1_post_parms[];
* HFI1_S_AHG_VALID - ahg header valid on chip * HFI1_S_AHG_VALID - ahg header valid on chip
* HFI1_S_AHG_CLEAR - have send engine clear ahg state * HFI1_S_AHG_CLEAR - have send engine clear ahg state
* HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
* HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
* HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
* HFI1_S_WAIT_HALT - halt the first leg send engine
* HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1 * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
*/ */
#define HFI1_S_AHG_VALID 0x80000000 #define HFI1_S_AHG_VALID 0x80000000
#define HFI1_S_AHG_CLEAR 0x40000000 #define HFI1_S_AHG_CLEAR 0x40000000
#define HFI1_S_WAIT_PIO_DRAIN 0x20000000 #define HFI1_S_WAIT_PIO_DRAIN 0x20000000
#define HFI1_S_WAIT_TID_SPACE 0x10000000
#define HFI1_S_WAIT_TID_RESP 0x08000000
#define HFI1_S_WAIT_HALT 0x04000000
#define HFI1_S_MIN_BIT_MASK 0x01000000 #define HFI1_S_MIN_BIT_MASK 0x01000000
/* /*
...@@ -76,6 +82,7 @@ extern const struct rvt_operation_params hfi1_post_parms[]; ...@@ -76,6 +82,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN) #define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) #define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
#define HFI1_S_ANY_TID_WAIT_SEND (RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA)
/* /*
* Send if not busy or waiting for I/O and either * Send if not busy or waiting for I/O and either
......
This diff is collapsed.
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef HFI1_RC_H
#define HFI1_RC_H
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
{
unsigned int next;
next = n + 1;
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
qp->s_tail_ack_queue = next;
qp->s_acked_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}
static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
struct rvt_qp *qp)
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
u32 len;
len = delta_psn(psn, wqe->psn) * pmtu;
return rvt_restart_sge(ss, wqe, len);
}
struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
u8 *prev_ack, bool *scheduled);
int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
struct hfi1_ctxtdata *rcd);
struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct hfi1_ibport *ibp);
#endif /* HFI1_RC_H */
...@@ -250,7 +250,6 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, ...@@ -250,7 +250,6 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
struct ib_other_headers *ohdr, struct ib_other_headers *ohdr,
u32 bth0, u32 bth1, u32 bth2) u32 bth0, u32 bth1, u32 bth2)
{ {
bth1 |= qp->remote_qpn;
ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(bth1); ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2); ohdr->bth[2] = cpu_to_be32(bth2);
...@@ -272,13 +271,13 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, ...@@ -272,13 +271,13 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
*/ */
static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
struct ib_other_headers *ohdr, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle, u32 bth0, u32 bth1, u32 bth2,
int middle,
struct hfi1_pkt_state *ps) struct hfi1_pkt_state *ps)
{ {
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = ps->ibp; struct hfi1_ibport *ibp = ps->ibp;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 bth1 = 0;
u32 slid; u32 slid;
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
u8 l4 = OPA_16B_L4_IB_LOCAL; u8 l4 = OPA_16B_L4_IB_LOCAL;
...@@ -360,12 +359,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, ...@@ -360,12 +359,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
*/ */
static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
struct ib_other_headers *ohdr, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle, u32 bth0, u32 bth1, u32 bth2,
int middle,
struct hfi1_pkt_state *ps) struct hfi1_pkt_state *ps)
{ {
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = ps->ibp; struct hfi1_ibport *ibp = ps->ibp;
u32 bth1 = 0;
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
u16 lrh0 = HFI1_LRH_BTH; u16 lrh0 = HFI1_LRH_BTH;
u8 extra_bytes = -ps->s_txreq->s_cur_size & 3; u8 extra_bytes = -ps->s_txreq->s_cur_size & 3;
...@@ -415,7 +414,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, ...@@ -415,7 +414,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp, typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp,
struct ib_other_headers *ohdr, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle, u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps); struct hfi1_pkt_state *ps);
/* We support only two types - 9B and 16B for now */ /* We support only two types - 9B and 16B for now */
...@@ -425,7 +424,7 @@ static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = { ...@@ -425,7 +424,7 @@ static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = {
}; };
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle, u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps) struct hfi1_pkt_state *ps)
{ {
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
...@@ -446,18 +445,21 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, ...@@ -446,18 +445,21 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
priv->s_ahg->ahgidx = 0; priv->s_ahg->ahgidx = 0;
/* Make the appropriate header */ /* Make the appropriate header */
hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth2, middle, ps); hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth1, bth2, middle,
ps);
} }
/* when sending, force a reschedule every one of these periods */ /* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
/** /**
* schedule_send_yield - test for a yield required for QP send engine * hfi1_schedule_send_yield - test for a yield required for QP
* send engine
* @timeout: Final time for timeout slice for jiffies * @timeout: Final time for timeout slice for jiffies
* @qp: a pointer to QP * @qp: a pointer to QP
* @ps: a pointer to a structure with commonly lookup values for * @ps: a pointer to a structure with commonly lookup values for
* the the send engine progress * the the send engine progress
* @tid - true if it is the tid leg
* *
* This routine checks if the time slice for the QP has expired * This routine checks if the time slice for the QP has expired
* for RC QPs, if so an additional work entry is queued. At this * for RC QPs, if so an additional work entry is queued. At this
...@@ -465,8 +467,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, ...@@ -465,8 +467,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
* returns true if a yield is required, otherwise, false * returns true if a yield is required, otherwise, false
* is returned. * is returned.
*/ */
static bool schedule_send_yield(struct rvt_qp *qp, bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_pkt_state *ps) bool tid)
{ {
ps->pkts_sent = true; ps->pkts_sent = true;
...@@ -474,8 +476,24 @@ static bool schedule_send_yield(struct rvt_qp *qp, ...@@ -474,8 +476,24 @@ static bool schedule_send_yield(struct rvt_qp *qp,
if (!ps->in_thread || if (!ps->in_thread ||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
spin_lock_irqsave(&qp->s_lock, ps->flags); spin_lock_irqsave(&qp->s_lock, ps->flags);
if (!tid) {
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp); hfi1_schedule_send(qp);
} else {
struct hfi1_qp_priv *priv = qp->priv;
if (priv->s_flags &
HFI1_S_TID_BUSY_SET) {
qp->s_flags &= ~RVT_S_BUSY;
priv->s_flags &=
~(HFI1_S_TID_BUSY_SET |
RVT_S_BUSY);
} else {
priv->s_flags &= ~RVT_S_BUSY;
}
hfi1_schedule_tid_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, ps->flags); spin_unlock_irqrestore(&qp->s_lock, ps->flags);
this_cpu_inc(*ps->ppd->dd->send_schedule); this_cpu_inc(*ps->ppd->dd->send_schedule);
trace_hfi1_rc_expired_time_slice(qp, true); trace_hfi1_rc_expired_time_slice(qp, true);
...@@ -576,6 +594,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) ...@@ -576,6 +594,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
do { do {
/* Check for a constructed packet to be sent. */ /* Check for a constructed packet to be sent. */
if (ps.s_txreq) { if (ps.s_txreq) {
if (priv->s_flags & HFI1_S_TID_BUSY_SET)
qp->s_flags |= RVT_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, ps.flags); spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/* /*
* If the packet cannot be sent now, return and * If the packet cannot be sent now, return and
...@@ -585,7 +605,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) ...@@ -585,7 +605,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
return; return;
/* allow other tasks to run */ /* allow other tasks to run */
if (schedule_send_yield(qp, &ps)) if (hfi1_schedule_send_yield(qp, &ps, false))
return; return;
spin_lock_irqsave(&qp->s_lock, ps.flags); spin_lock_irqsave(&qp->s_lock, ps.flags);
......
...@@ -1747,10 +1747,9 @@ static inline u16 sdma_gethead(struct sdma_engine *sde) ...@@ -1747,10 +1747,9 @@ static inline u16 sdma_gethead(struct sdma_engine *sde)
*/ */
static void sdma_desc_avail(struct sdma_engine *sde, uint avail) static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{ {
struct iowait *wait, *nw; struct iowait *wait, *nw, *twait;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0; uint i, n = 0, seq, tidx = 0;
u8 max_starved_cnt = 0;
#ifdef CONFIG_SDMA_VERBOSITY #ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx, dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
...@@ -1775,13 +1774,20 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) ...@@ -1775,13 +1774,20 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
continue; continue;
if (n == ARRAY_SIZE(waits)) if (n == ARRAY_SIZE(waits))
break; break;
iowait_init_priority(wait);
num_desc = iowait_get_all_desc(wait); num_desc = iowait_get_all_desc(wait);
if (num_desc > avail) if (num_desc > avail)
break; break;
avail -= num_desc; avail -= num_desc;
/* Find the most starved wait memeber */ /* Find the top-priority wait memeber */
iowait_starve_find_max(wait, &max_starved_cnt, if (n) {
n, &max_idx); twait = waits[tidx];
tidx =
iowait_priority_update_top(wait,
twait,
n,
tidx);
}
list_del_init(&wait->list); list_del_init(&wait->list);
waits[n++] = wait; waits[n++] = wait;
} }
...@@ -1790,12 +1796,12 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) ...@@ -1790,12 +1796,12 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
} }
} while (read_seqretry(&sde->waitlock, seq)); } while (read_seqretry(&sde->waitlock, seq));
/* Schedule the most starved one first */ /* Schedule the top-priority entry first */
if (n) if (n)
waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON); waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
if (i != max_idx) if (i != tidx)
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON); waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
} }
......
...@@ -91,6 +91,7 @@ struct sdma_desc { ...@@ -91,6 +91,7 @@ struct sdma_desc {
#define SDMA_TXREQ_F_URGENT 0x0001 #define SDMA_TXREQ_F_URGENT 0x0001
#define SDMA_TXREQ_F_AHG_COPY 0x0002 #define SDMA_TXREQ_F_AHG_COPY 0x0002
#define SDMA_TXREQ_F_USE_AHG 0x0004 #define SDMA_TXREQ_F_USE_AHG 0x0004
#define SDMA_TXREQ_F_VIP 0x0010
struct sdma_txreq; struct sdma_txreq;
typedef void (*callback_t)(struct sdma_txreq *, int); typedef void (*callback_t)(struct sdma_txreq *, int);
......
This diff is collapsed.
This diff is collapsed.
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
*/ */
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "trace.h" #include "trace.h"
#include "exp_rcv.h"
static u8 __get_ib_hdr_len(struct ib_header *hdr) static u8 __get_ib_hdr_len(struct ib_header *hdr)
{ {
...@@ -128,6 +129,15 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2) ...@@ -128,6 +129,15 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define IETH_PRN "ieth rkey:0x%.8x" #define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx" #define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" #define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
#define TID_RDMA_KDETH "kdeth0 0x%x kdeth1 0x%x"
#define TID_RDMA_KDETH_DATA "kdeth0 0x%x: kver %u sh %u intr %u tidctrl %u tid %x offset %x kdeth1 0x%x: jkey %x"
#define TID_READ_REQ_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_READ_RSP_PRN "verbs_qp 0x%x"
#define TID_WRITE_REQ_PRN "original_qp 0x%x"
#define TID_WRITE_RSP_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_WRITE_DATA_PRN "verbs_qp 0x%x"
#define TID_ACK_PRN "tid_flow_psn 0x%x verbs_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_RESYNC_PRN "verbs_qp 0x%x"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op #define OP(transport, op) IB_OPCODE_## transport ## _ ## op
...@@ -322,6 +332,99 @@ const char *parse_everbs_hdrs( ...@@ -322,6 +332,99 @@ const char *parse_everbs_hdrs(
parse_syndrome(be32_to_cpu(eh->aeth) >> 24), parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
be32_to_cpu(eh->aeth) & IB_MSN_MASK); be32_to_cpu(eh->aeth) & IB_MSN_MASK);
break; break;
case OP(TID_RDMA, WRITE_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_WRITE_REQ_PRN,
le32_to_cpu(eh->tid_rdma.w_req.kdeth0),
le32_to_cpu(eh->tid_rdma.w_req.kdeth1),
ib_u64_get(&eh->tid_rdma.w_req.reth.vaddr),
be32_to_cpu(eh->tid_rdma.w_req.reth.rkey),
be32_to_cpu(eh->tid_rdma.w_req.reth.length),
be32_to_cpu(eh->tid_rdma.w_req.verbs_qp));
break;
case OP(TID_RDMA, WRITE_RESP):
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
TID_WRITE_RSP_PRN,
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth0),
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth1),
be32_to_cpu(eh->tid_rdma.w_rsp.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.w_rsp.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.w_rsp.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.w_rsp.verbs_qp));
break;
case OP(TID_RDMA, WRITE_DATA_LAST):
case OP(TID_RDMA, WRITE_DATA):
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " TID_WRITE_DATA_PRN,
le32_to_cpu(eh->tid_rdma.w_data.kdeth0),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, KVER),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, SH),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, INTR),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TIDCTRL),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TID),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, OFFSET),
le32_to_cpu(eh->tid_rdma.w_data.kdeth1),
KDETH_GET(eh->tid_rdma.w_data.kdeth1, JKEY),
be32_to_cpu(eh->tid_rdma.w_data.verbs_qp));
break;
case OP(TID_RDMA, READ_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_READ_REQ_PRN,
le32_to_cpu(eh->tid_rdma.r_req.kdeth0),
le32_to_cpu(eh->tid_rdma.r_req.kdeth1),
ib_u64_get(&eh->tid_rdma.r_req.reth.vaddr),
be32_to_cpu(eh->tid_rdma.r_req.reth.rkey),
be32_to_cpu(eh->tid_rdma.r_req.reth.length),
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.r_req.verbs_qp));
break;
case OP(TID_RDMA, READ_RESP):
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " AETH_PRN " "
TID_READ_RSP_PRN,
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth0),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, KVER),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, SH),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, INTR),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TIDCTRL),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TID),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, OFFSET),
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth1),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth1, JKEY),
be32_to_cpu(eh->tid_rdma.r_rsp.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.r_rsp.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.r_rsp.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.r_rsp.verbs_qp));
break;
case OP(TID_RDMA, ACK):
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
TID_ACK_PRN,
le32_to_cpu(eh->tid_rdma.ack.kdeth0),
le32_to_cpu(eh->tid_rdma.ack.kdeth1),
be32_to_cpu(eh->tid_rdma.ack.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.ack.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.ack.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.ack.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.ack.verbs_psn),
be32_to_cpu(eh->tid_rdma.ack.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.ack.verbs_qp));
break;
case OP(TID_RDMA, RESYNC):
trace_seq_printf(p, TID_RDMA_KDETH " " TID_RESYNC_PRN,
le32_to_cpu(eh->tid_rdma.resync.kdeth0),
le32_to_cpu(eh->tid_rdma.resync.kdeth1),
be32_to_cpu(eh->tid_rdma.resync.verbs_qp));
break;
/* aeth + atomicacketh */ /* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE): case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN, trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
...@@ -394,6 +497,21 @@ const char *print_u32_array( ...@@ -394,6 +497,21 @@ const char *print_u32_array(
return ret; return ret;
} }
u8 hfi1_trace_get_tid_ctrl(u32 ent)
{
return EXP_TID_GET(ent, CTRL);
}
u16 hfi1_trace_get_tid_len(u32 ent)
{
return EXP_TID_GET(ent, LEN);
}
u16 hfi1_trace_get_tid_idx(u32 ent)
{
return EXP_TID_GET(ent, IDX);
}
__hfi1_trace_fn(AFFINITY); __hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT); __hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC); __hfi1_trace_fn(PROC);
......
...@@ -63,3 +63,4 @@ __print_symbolic(etype, \ ...@@ -63,3 +63,4 @@ __print_symbolic(etype, \
#include "trace_tx.h" #include "trace_tx.h"
#include "trace_mmu.h" #include "trace_mmu.h"
#include "trace_iowait.h" #include "trace_iowait.h"
#include "trace_tid.h"
...@@ -79,6 +79,14 @@ __print_symbolic(opcode, \ ...@@ -79,6 +79,14 @@ __print_symbolic(opcode, \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \ ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \ ib_opcode_name(RC_FETCH_ADD), \
ib_opcode_name(TID_RDMA_WRITE_REQ), \
ib_opcode_name(TID_RDMA_WRITE_RESP), \
ib_opcode_name(TID_RDMA_WRITE_DATA), \
ib_opcode_name(TID_RDMA_WRITE_DATA_LAST), \
ib_opcode_name(TID_RDMA_READ_REQ), \
ib_opcode_name(TID_RDMA_READ_RESP), \
ib_opcode_name(TID_RDMA_RESYNC), \
ib_opcode_name(TID_RDMA_ACK), \
ib_opcode_name(UC_SEND_FIRST), \ ib_opcode_name(UC_SEND_FIRST), \
ib_opcode_name(UC_SEND_MIDDLE), \ ib_opcode_name(UC_SEND_MIDDLE), \
ib_opcode_name(UC_SEND_LAST), \ ib_opcode_name(UC_SEND_LAST), \
......
...@@ -109,6 +109,54 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error, ...@@ -109,6 +109,54 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_ARGS(qp, psn) TP_ARGS(qp, psn)
); );
DEFINE_EVENT(/* event */
hfi1_rc_template, hfi1_rc_completion,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DECLARE_EVENT_CLASS(/* rc_ack */
hfi1_rc_ack_template,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
struct rvt_swqe *wqe),
TP_ARGS(qp, aeth, psn, wqe),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u32, aeth)
__field(u32, psn)
__field(u8, opcode)
__field(u32, spsn)
__field(u32, lpsn)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->aeth = aeth;
__entry->psn = psn;
__entry->opcode = wqe->wr.opcode;
__entry->spsn = wqe->psn;
__entry->lpsn = wqe->lpsn;
),
TP_printk(/* print */
"[%s] qpn 0x%x aeth 0x%x psn 0x%x opcode 0x%x spsn 0x%x lpsn 0x%x",
__get_str(dev),
__entry->qpn,
__entry->aeth,
__entry->psn,
__entry->opcode,
__entry->spsn,
__entry->lpsn
)
);
DEFINE_EVENT(/* do_rc_ack */
hfi1_rc_ack_template, hfi1_rc_ack_do,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
struct rvt_swqe *wqe),
TP_ARGS(qp, aeth, psn, wqe)
);
#endif /* __HFI1_TRACE_RC_H */ #endif /* __HFI1_TRACE_RC_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
......
/* /*
* Copyright(c) 2015 - 2017 Intel Corporation. * Copyright(c) 2015 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
...@@ -128,111 +128,6 @@ TRACE_EVENT(hfi1_receive_interrupt, ...@@ -128,111 +128,6 @@ TRACE_EVENT(hfi1_receive_interrupt,
) )
); );
DECLARE_EVENT_CLASS(
hfi1_exp_tid_reg_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
u32 npages, unsigned long va, unsigned long pa,
dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
TP_STRUCT__entry(
__field(unsigned int, ctxt)
__field(u16, subctxt)
__field(u32, rarr)
__field(u32, npages)
__field(unsigned long, va)
__field(unsigned long, pa)
__field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->rarr = rarr;
__entry->npages = npages;
__entry->va = va;
__entry->pa = pa;
__entry->dma = dma;
),
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
__entry->ctxt,
__entry->subctxt,
__entry->rarr,
__entry->npages,
__entry->pa,
__entry->va,
__entry->dma
)
);
DEFINE_EVENT(
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
DEFINE_EVENT(
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
TRACE_EVENT(
hfi1_put_tid,
TP_PROTO(struct hfi1_devdata *dd,
u32 index, u32 type, unsigned long pa, u16 order),
TP_ARGS(dd, index, type, pa, order),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
__field(unsigned long, pa);
__field(u32, index);
__field(u32, type);
__field(u16, order);
),
TP_fast_assign(
DD_DEV_ASSIGN(dd);
__entry->pa = pa;
__entry->index = index;
__entry->type = type;
__entry->order = order;
),
TP_printk("[%s] type %s pa %lx index %u order %u",
__get_str(dev),
show_tidtype(__entry->type),
__entry->pa,
__entry->index,
__entry->order
)
);
TRACE_EVENT(hfi1_exp_tid_inval,
TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
u32 npages, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
TP_STRUCT__entry(
__field(unsigned int, ctxt)
__field(u16, subctxt)
__field(unsigned long, va)
__field(u32, rarr)
__field(u32, npages)
__field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->va = va;
__entry->rarr = rarr;
__entry->npages = npages;
__entry->dma = dma;
),
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
__entry->ctxt,
__entry->subctxt,
__entry->rarr,
__entry->npages,
__entry->va,
__entry->dma
)
);
TRACE_EVENT(hfi1_mmu_invalidate, TRACE_EVENT(hfi1_mmu_invalidate,
TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type, TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type,
unsigned long start, unsigned long end), unsigned long start, unsigned long end),
......
This diff is collapsed.
...@@ -114,19 +114,27 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, ...@@ -114,19 +114,27 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
__field(u32, qpn) __field(u32, qpn)
__field(u32, flags) __field(u32, flags)
__field(u32, s_flags) __field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
), ),
TP_fast_assign( TP_fast_assign(
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->flags = flags; __entry->flags = flags;
__entry->qpn = qp->ibqp.qp_num; __entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags; __entry->s_flags = qp->s_flags;
__entry->ps_flags =
((struct hfi1_qp_priv *)qp->priv)->s_flags;
__entry->iow_flags =
((struct hfi1_qp_priv *)qp->priv)->s_iowait.flags;
), ),
TP_printk( TP_printk(
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x", "[%s] qpn 0x%x flags 0x%x s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx",
__get_str(dev), __get_str(dev),
__entry->qpn, __entry->qpn,
__entry->flags, __entry->flags,
__entry->s_flags __entry->s_flags,
__entry->ps_flags,
__entry->iow_flags
) )
); );
...@@ -838,6 +846,12 @@ DEFINE_EVENT( ...@@ -838,6 +846,12 @@ DEFINE_EVENT(
TP_ARGS(qp, flag) TP_ARGS(qp, flag)
); );
DEFINE_EVENT(/* event */
hfi1_do_send_template, hfi1_rc_do_tid_send,
TP_PROTO(struct rvt_qp *qp, bool flag),
TP_ARGS(qp, flag)
);
DEFINE_EVENT( DEFINE_EVENT(
hfi1_do_send_template, hfi1_rc_expired_time_slice, hfi1_do_send_template, hfi1_rc_expired_time_slice,
TP_PROTO(struct rvt_qp *qp, bool flag), TP_PROTO(struct rvt_qp *qp, bool flag),
......
...@@ -271,7 +271,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -271,7 +271,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ps->s_txreq->ss = &qp->s_sge; ps->s_txreq->ss = &qp->s_sge;
ps->s_txreq->s_cur_size = len; ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_psn++), middle, ps); qp->remote_qpn, mask_psn(qp->s_psn++),
middle, ps);
return 1; return 1;
done_free_tx: done_free_tx:
......
...@@ -48,7 +48,6 @@ ...@@ -48,7 +48,6 @@
*/ */
#include "hfi.h" #include "hfi.h"
#include "exp_rcv.h" #include "exp_rcv.h"
struct tid_pageset { struct tid_pageset {
......
...@@ -144,8 +144,10 @@ static int defer_packet_queue( ...@@ -144,8 +144,10 @@ static int defer_packet_queue(
*/ */
xchg(&pq->state, SDMA_PKT_Q_DEFERRED); xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
write_seqlock(&sde->waitlock); write_seqlock(&sde->waitlock);
if (list_empty(&pq->busy.list)) if (list_empty(&pq->busy.list)) {
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
write_sequnlock(&sde->waitlock); write_sequnlock(&sde->waitlock);
return -EBUSY; return -EBUSY;
eagain: eagain:
...@@ -191,7 +193,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, ...@@ -191,7 +193,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->mm = fd->mm; pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue, iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL); activate_packet_queue, NULL, NULL);
pq->reqidx = 0; pq->reqidx = 0;
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
...@@ -1126,7 +1128,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags) ...@@ -1126,7 +1128,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
0xffffffull), 0xffffffull),
psn = val & mask; psn = val & mask;
if (expct) if (expct)
psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK); psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
else else
psn = psn + frags; psn = psn + frags;
return psn & mask; return psn & mask;
......
This diff is collapsed.
...@@ -72,6 +72,7 @@ struct hfi1_packet; ...@@ -72,6 +72,7 @@ struct hfi1_packet;
#include "iowait.h" #include "iowait.h"
#include "tid_rdma.h" #include "tid_rdma.h"
#include "opfn.h"
#define HFI1_MAX_RDMA_ATOMIC 16 #define HFI1_MAX_RDMA_ATOMIC 16
...@@ -158,10 +159,68 @@ struct hfi1_qp_priv { ...@@ -158,10 +159,68 @@ struct hfi1_qp_priv {
struct sdma_engine *s_sde; /* current sde */ struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */ struct send_context *s_sendcontext; /* current sendcontext */
struct hfi1_ctxtdata *rcd; /* QP's receive context */ struct hfi1_ctxtdata *rcd; /* QP's receive context */
struct page **pages; /* for TID page scan */
u32 tid_enqueue; /* saved when tid waited */
u8 s_sc; /* SC[0..4] for next packet */ u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait; struct iowait s_iowait;
struct timer_list s_tid_timer; /* for timing tid wait */
struct timer_list s_tid_retry_timer; /* for timing tid ack */
struct list_head tid_wait; /* for queueing tid space */
struct hfi1_opfn_data opfn;
struct tid_flow_state flow_state;
struct tid_rdma_qp_params tid_rdma;
struct rvt_qp *owner; struct rvt_qp *owner;
u8 hdr_type; /* 9B or 16B */ u8 hdr_type; /* 9B or 16B */
struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */
atomic_t n_requests; /* # of TID RDMA requests in the */
/* queue */
atomic_t n_tid_requests; /* # of sent TID RDMA requests */
unsigned long tid_timer_timeout_jiffies;
unsigned long tid_retry_timeout_jiffies;
/* variables for the TID RDMA SE state machine */
u8 s_state;
u8 s_retry;
u8 rnr_nak_state; /* RNR NAK state */
u8 s_nak_state;
u32 s_nak_psn;
u32 s_flags;
u32 s_tid_cur;
u32 s_tid_head;
u32 s_tid_tail;
u32 r_tid_head; /* Most recently added TID RDMA request */
u32 r_tid_tail; /* the last completed TID RDMA request */
u32 r_tid_ack; /* the TID RDMA request to be ACK'ed */
u32 r_tid_alloc; /* Request for which we are allocating resources */
u32 pending_tid_w_segs; /* Num of pending tid write segments */
u32 pending_tid_w_resp; /* Num of pending tid write responses */
u32 alloc_w_segs; /* Number of segments for which write */
/* resources have been allocated for this QP */
/* For TID RDMA READ */
u32 tid_r_reqs; /* Num of tid reads requested */
u32 tid_r_comp; /* Num of tid reads completed */
u32 pending_tid_r_segs; /* Num of pending tid read segments */
u16 pkts_ps; /* packets per segment */
u8 timeout_shift; /* account for number of packets per segment */
u32 r_next_psn_kdeth;
u32 r_next_psn_kdeth_save;
u32 s_resync_psn;
u8 sync_pt; /* Set when QP reaches sync point */
u8 resync;
};
#define HFI1_QP_WQE_INVALID ((u32)-1)
struct hfi1_swqe_priv {
struct tid_rdma_request tid_req;
struct rvt_sge_state ss; /* Used for TID RDMA READ Request */
};
struct hfi1_ack_priv {
struct rvt_sge_state ss; /* used for TID WRITE RESP */
struct tid_rdma_request tid_req;
}; };
/* /*
...@@ -225,6 +284,7 @@ struct hfi1_ibdev { ...@@ -225,6 +284,7 @@ struct hfi1_ibdev {
struct kmem_cache *verbs_txreq_cache; struct kmem_cache *verbs_txreq_cache;
u64 n_txwait; u64 n_txwait;
u64 n_kmem_wait; u64 n_kmem_wait;
u64 n_tidwait;
/* protect iowait lists */ /* protect iowait lists */
seqlock_t iowait_lock ____cacheline_aligned_in_smp; seqlock_t iowait_lock ____cacheline_aligned_in_smp;
...@@ -312,6 +372,31 @@ static inline u32 delta_psn(u32 a, u32 b) ...@@ -312,6 +372,31 @@ static inline u32 delta_psn(u32 a, u32 b)
return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT; return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
} }
static inline struct tid_rdma_request *wqe_to_tid_req(struct rvt_swqe *wqe)
{
return &((struct hfi1_swqe_priv *)wqe->priv)->tid_req;
}
static inline struct tid_rdma_request *ack_to_tid_req(struct rvt_ack_entry *e)
{
return &((struct hfi1_ack_priv *)e->priv)->tid_req;
}
/*
* Look through all the active flows for a TID RDMA request and find
* the one (if it exists) that contains the specified PSN.
*/
static inline u32 __full_flow_psn(struct flow_state *state, u32 psn)
{
return mask_psn((state->generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
(psn & HFI1_KDETH_BTH_SEQ_MASK));
}
static inline u32 full_flow_psn(struct tid_rdma_flow *flow, u32 psn)
{
return __full_flow_psn(&flow->flow_state, psn);
}
struct verbs_txreq; struct verbs_txreq;
void hfi1_put_txreq(struct verbs_txreq *tx); void hfi1_put_txreq(struct verbs_txreq *tx);
...@@ -356,9 +441,12 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, ...@@ -356,9 +441,12 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
const struct ib_global_route *grh, u32 hwords, u32 nwords); const struct ib_global_route *grh, u32 hwords, u32 nwords);
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle, u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps); struct hfi1_pkt_state *ps);
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
bool tid);
void _hfi1_do_send(struct work_struct *work); void _hfi1_do_send(struct work_struct *work);
void hfi1_do_send_from_rvt(struct rvt_qp *qp); void hfi1_do_send_from_rvt(struct rvt_qp *qp);
...@@ -377,6 +465,10 @@ int hfi1_register_ib_device(struct hfi1_devdata *); ...@@ -377,6 +465,10 @@ int hfi1_register_ib_device(struct hfi1_devdata *);
void hfi1_unregister_ib_device(struct hfi1_devdata *); void hfi1_unregister_ib_device(struct hfi1_devdata *);
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet);
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet);
void hfi1_ib_rcv(struct hfi1_packet *packet); void hfi1_ib_rcv(struct hfi1_packet *packet);
void hfi1_16B_rcv(struct hfi1_packet *packet); void hfi1_16B_rcv(struct hfi1_packet *packet);
...@@ -394,6 +486,16 @@ static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr) ...@@ -394,6 +486,16 @@ static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ); return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
} }
void hfi1_wait_kmem(struct rvt_qp *qp);
static inline void hfi1_trdma_send_complete(struct rvt_qp *qp,
struct rvt_swqe *wqe,
enum ib_wc_status status)
{
trdma_clean_swqe(qp, wqe);
rvt_send_complete(qp, wqe, status);
}
extern const enum ib_wc_opcode ib_hfi1_wc_opcode[]; extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
extern const u8 hdr_len_by_opcode[]; extern const u8 hdr_len_by_opcode[];
......
...@@ -94,6 +94,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, ...@@ -94,6 +94,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
tx->txreq.num_desc = 0; tx->txreq.num_desc = 0;
/* Set the header type */ /* Set the header type */
tx->phdr.hdr.hdr_type = priv->hdr_type; tx->phdr.hdr.hdr_type = priv->hdr_type;
tx->txreq.flags = 0;
return tx; return tx;
} }
......
...@@ -240,8 +240,10 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, ...@@ -240,8 +240,10 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
} }
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
if (list_empty(&vnic_sdma->wait.list)) if (list_empty(&vnic_sdma->wait.list)) {
iowait_get_priority(wait->iow);
iowait_queue(pkts_sent, wait->iow, &sde->dmawait); iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
}
write_sequnlock(&sde->waitlock); write_sequnlock(&sde->waitlock);
return -EBUSY; return -EBUSY;
} }
...@@ -281,7 +283,7 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo) ...@@ -281,7 +283,7 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
iowait_init(&vnic_sdma->wait, 0, NULL, NULL, iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
hfi1_vnic_sdma_sleep, hfi1_vnic_sdma_sleep,
hfi1_vnic_sdma_wakeup, NULL); hfi1_vnic_sdma_wakeup, NULL, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i]; vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd; vnic_sdma->dd = vinfo->dd;
vnic_sdma->vinfo = vinfo; vnic_sdma->vinfo = vinfo;
......
...@@ -45,12 +45,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, ...@@ -45,12 +45,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 len; u32 len;
len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu; len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
ss->sge = wqe->sg_list[0]; return rvt_restart_sge(ss, wqe, len);
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
rvt_skip_sge(ss, len, false);
return wqe->length - len;
} }
/** /**
......
...@@ -854,6 +854,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, ...@@ -854,6 +854,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qp->s_mig_state = IB_MIG_MIGRATED; qp->s_mig_state = IB_MIG_MIGRATED;
qp->r_head_ack_queue = 0; qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0; qp->s_tail_ack_queue = 0;
qp->s_acked_ack_queue = 0;
qp->s_num_rd_atomic = 0; qp->s_num_rd_atomic = 0;
if (qp->r_rq.wq) { if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0; qp->r_rq.wq->head = 0;
...@@ -1642,11 +1643,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp) ...@@ -1642,11 +1643,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
kref_put(&qp->ip->ref, rvt_release_mmap_info); kref_put(&qp->ip->ref, rvt_release_mmap_info);
else else
vfree(qp->r_rq.wq); vfree(qp->r_rq.wq);
vfree(qp->s_wq);
rdi->driver_f.qp_priv_free(rdi, qp); rdi->driver_f.qp_priv_free(rdi, qp);
kfree(qp->s_ack_queue); kfree(qp->s_ack_queue);
rdma_destroy_ah_attr(&qp->remote_ah_attr); rdma_destroy_ah_attr(&qp->remote_ah_attr);
rdma_destroy_ah_attr(&qp->alt_ah_attr); rdma_destroy_ah_attr(&qp->alt_ah_attr);
vfree(qp->s_wq);
kfree(qp); kfree(qp);
return 0; return 0;
} }
...@@ -2393,11 +2394,12 @@ static inline unsigned long rvt_aeth_to_usec(u32 aeth) ...@@ -2393,11 +2394,12 @@ static inline unsigned long rvt_aeth_to_usec(u32 aeth)
} }
/* /*
* rvt_add_retry_timer - add/start a retry timer * rvt_add_retry_timer_ext - add/start a retry timer
* @qp - the QP * @qp - the QP
* @shift - timeout shift to wait for multiple packets
* add a retry timer on the QP * add a retry timer on the QP
*/ */
void rvt_add_retry_timer(struct rvt_qp *qp) void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift)
{ {
struct ib_qp *ibqp = &qp->ibqp; struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
...@@ -2405,11 +2407,11 @@ void rvt_add_retry_timer(struct rvt_qp *qp) ...@@ -2405,11 +2407,11 @@ void rvt_add_retry_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock); lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER; qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */ /* 4.096 usec. * (1 << qp->timeout) */
qp->s_timer.expires = jiffies + qp->timeout_jiffies + qp->s_timer.expires = jiffies + rdi->busy_jiffies +
rdi->busy_jiffies; (qp->timeout_jiffies << shift);
add_timer(&qp->s_timer); add_timer(&qp->s_timer);
} }
EXPORT_SYMBOL(rvt_add_retry_timer); EXPORT_SYMBOL(rvt_add_retry_timer_ext);
/** /**
* rvt_add_rnr_timer - add/start an rnr timer * rvt_add_rnr_timer - add/start an rnr timer
......
...@@ -187,3 +187,16 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth) ...@@ -187,3 +187,16 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth)
} }
} }
EXPORT_SYMBOL(rvt_get_credit); EXPORT_SYMBOL(rvt_get_credit);
/* rvt_restart_sge - rewind the sge state for a wqe */
u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len)
{
ss->sge = wqe->sg_list[0];
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
rvt_skip_sge(ss, len, false);
return wqe->length - len;
}
EXPORT_SYMBOL(rvt_restart_sge);
/* /*
* Copyright(c) 2016 Intel Corporation. * Copyright(c) 2016 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
...@@ -100,6 +100,8 @@ struct ib_atomic_eth { ...@@ -100,6 +100,8 @@ struct ib_atomic_eth {
__be64 compare_data; /* potentially unaligned */ __be64 compare_data; /* potentially unaligned */
} __packed; } __packed;
#include <rdma/tid_rdma_defs.h>
union ib_ehdrs { union ib_ehdrs {
struct { struct {
__be32 deth[2]; __be32 deth[2];
...@@ -117,6 +119,16 @@ union ib_ehdrs { ...@@ -117,6 +119,16 @@ union ib_ehdrs {
__be32 aeth; __be32 aeth;
__be32 ieth; __be32 ieth;
struct ib_atomic_eth atomic_eth; struct ib_atomic_eth atomic_eth;
/* TID RDMA headers */
union {
struct tid_rdma_read_req r_req;
struct tid_rdma_read_resp r_rsp;
struct tid_rdma_write_req w_req;
struct tid_rdma_write_resp w_rsp;
struct tid_rdma_write_data w_data;
struct tid_rdma_resync resync;
struct tid_rdma_ack ack;
} tid_rdma;
} __packed; } __packed;
struct ib_other_headers { struct ib_other_headers {
......
...@@ -182,6 +182,7 @@ struct rvt_driver_params { ...@@ -182,6 +182,7 @@ struct rvt_driver_params {
u32 max_mad_size; u32 max_mad_size;
u8 qos_shift; u8 qos_shift;
u8 max_rdma_atomic; u8 max_rdma_atomic;
u8 extra_rdma_atomic;
u8 reserved_operations; u8 reserved_operations;
}; };
...@@ -519,7 +520,14 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) ...@@ -519,7 +520,14 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
*/ */
static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
{ {
return rdi->dparms.max_rdma_atomic + 1; return rdi->dparms.max_rdma_atomic +
rdi->dparms.extra_rdma_atomic + 1;
}
static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
{
return rdi->dparms.max_rdma_atomic +
rdi->dparms.extra_rdma_atomic;
} }
/* /*
...@@ -566,9 +574,10 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, ...@@ -566,9 +574,10 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
/** /**
* rvt_mod_retry_timer - mod a retry timer * rvt_mod_retry_timer - mod a retry timer
* @qp - the QP * @qp - the QP
* @shift - timeout shift to wait for multiple packets
* Modify a potentially already running retry timer * Modify a potentially already running retry timer
*/ */
static inline void rvt_mod_retry_timer(struct rvt_qp *qp) static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
{ {
struct ib_qp *ibqp = &qp->ibqp; struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
...@@ -576,8 +585,13 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp) ...@@ -576,8 +585,13 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock); lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER; qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */ /* 4.096 usec. * (1 << qp->timeout) */
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
rdi->busy_jiffies); (qp->timeout_jiffies << shift));
}
static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
{
return rvt_mod_retry_timer_ext(qp, 0);
} }
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
......
...@@ -174,6 +174,7 @@ struct rvt_swqe { ...@@ -174,6 +174,7 @@ struct rvt_swqe {
u32 lpsn; /* last packet sequence number */ u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */ u32 ssn; /* send sequence number */
u32 length; /* total length of data in sg_list */ u32 length; /* total length of data in sg_list */
void *priv; /* driver dependent field */
struct rvt_sge sg_list[0]; struct rvt_sge sg_list[0];
}; };
...@@ -235,6 +236,7 @@ struct rvt_ack_entry { ...@@ -235,6 +236,7 @@ struct rvt_ack_entry {
u32 lpsn; u32 lpsn;
u8 opcode; u8 opcode;
u8 sent; u8 sent;
void *priv;
}; };
#define RC_QP_SCALING_INTERVAL 5 #define RC_QP_SCALING_INTERVAL 5
...@@ -244,6 +246,7 @@ struct rvt_ack_entry { ...@@ -244,6 +246,7 @@ struct rvt_ack_entry {
#define RVT_OPERATION_ATOMIC_SGE 0x00000004 #define RVT_OPERATION_ATOMIC_SGE 0x00000004
#define RVT_OPERATION_LOCAL 0x00000008 #define RVT_OPERATION_LOCAL 0x00000008
#define RVT_OPERATION_USE_RESERVE 0x00000010 #define RVT_OPERATION_USE_RESERVE 0x00000010
#define RVT_OPERATION_IGN_RNR_CNT 0x00000020
#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
...@@ -373,6 +376,7 @@ struct rvt_qp { ...@@ -373,6 +376,7 @@ struct rvt_qp {
u8 s_rnr_retry; /* requester RNR retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
u8 s_acked_ack_queue; /* index into s_ack_queue[] */
struct rvt_sge_state s_ack_rdma_sge; struct rvt_sge_state s_ack_rdma_sge;
struct timer_list s_timer; struct timer_list s_timer;
...@@ -628,6 +632,16 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp); ...@@ -628,6 +632,16 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp);
*/ */
void rvt_get_credit(struct rvt_qp *qp, u32 aeth); void rvt_get_credit(struct rvt_qp *qp, u32 aeth);
/**
* rvt_restart_sge - rewind the sge state for a wqe
* @ss: the sge state pointer
* @wqe: the wqe to rewind
* @len: the data length from the start of the wqe in bytes
*
* Returns the remaining data length.
*/
u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
/** /**
* @qp - the qp pair * @qp - the qp pair
* @len - the length * @len - the length
...@@ -676,7 +690,11 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t); ...@@ -676,7 +690,11 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t);
void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth); void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth);
void rvt_del_timers_sync(struct rvt_qp *qp); void rvt_del_timers_sync(struct rvt_qp *qp);
void rvt_stop_rc_timers(struct rvt_qp *qp); void rvt_stop_rc_timers(struct rvt_qp *qp);
void rvt_add_retry_timer(struct rvt_qp *qp); void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift);
static inline void rvt_add_retry_timer(struct rvt_qp *qp)
{
rvt_add_retry_timer_ext(qp, 0);
}
void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
void *data, u32 length, void *data, u32 length,
......
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef TID_RDMA_DEFS_H
#define TID_RDMA_DEFS_H
#include <rdma/ib_pack.h>
struct tid_rdma_read_req {
__le32 kdeth0;
__le32 kdeth1;
struct ib_reth reth;
__be32 tid_flow_psn;
__be32 tid_flow_qp;
__be32 verbs_qp;
};
struct tid_rdma_read_resp {
__le32 kdeth0;
__le32 kdeth1;
__be32 aeth;
__be32 reserved[4];
__be32 verbs_psn;
__be32 verbs_qp;
};
struct tid_rdma_write_req {
__le32 kdeth0;
__le32 kdeth1;
struct ib_reth reth;
__be32 reserved[2];
__be32 verbs_qp;
};
struct tid_rdma_write_resp {
__le32 kdeth0;
__le32 kdeth1;
__be32 aeth;
__be32 reserved[3];
__be32 tid_flow_psn;
__be32 tid_flow_qp;
__be32 verbs_qp;
};
struct tid_rdma_write_data {
__le32 kdeth0;
__le32 kdeth1;
__be32 reserved[6];
__be32 verbs_qp;
};
struct tid_rdma_resync {
__le32 kdeth0;
__le32 kdeth1;
__be32 reserved[6];
__be32 verbs_qp;
};
struct tid_rdma_ack {
__le32 kdeth0;
__le32 kdeth1;
__be32 aeth;
__be32 reserved[2];
__be32 tid_flow_psn;
__be32 verbs_psn;
__be32 tid_flow_qp;
__be32 verbs_qp;
};
/*
* TID RDMA Opcodes
*/
#define IB_OPCODE_TID_RDMA 0xe0
enum {
IB_OPCODE_WRITE_REQ = 0x0,
IB_OPCODE_WRITE_RESP = 0x1,
IB_OPCODE_WRITE_DATA = 0x2,
IB_OPCODE_WRITE_DATA_LAST = 0x3,
IB_OPCODE_READ_REQ = 0x4,
IB_OPCODE_READ_RESP = 0x5,
IB_OPCODE_RESYNC = 0x6,
IB_OPCODE_ACK = 0x7,
IB_OPCODE(TID_RDMA, WRITE_REQ),
IB_OPCODE(TID_RDMA, WRITE_RESP),
IB_OPCODE(TID_RDMA, WRITE_DATA),
IB_OPCODE(TID_RDMA, WRITE_DATA_LAST),
IB_OPCODE(TID_RDMA, READ_REQ),
IB_OPCODE(TID_RDMA, READ_RESP),
IB_OPCODE(TID_RDMA, RESYNC),
IB_OPCODE(TID_RDMA, ACK),
};
#define TID_OP(x) IB_OPCODE_TID_RDMA_##x
/*
* Define TID RDMA specific WR opcodes. The ib_wr_opcode
* enum already provides some reserved values for use by
* low level drivers. Two of those are used but renamed
* to be more descriptive.
*/
#define IB_WR_TID_RDMA_WRITE IB_WR_RESERVED1
#define IB_WR_TID_RDMA_READ IB_WR_RESERVED2
#endif /* TID_RDMA_DEFS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment