Commit 525dfa2c authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5-odp'

Saeed Mahameed says:

====================
Mellanox mlx5 core and ODP updates 2017-01-01

The following eleven patches mainly come from Artemy Kovalyov
who expanded mlx5 on-demand-paging (ODP) support. In addition
there are three cleanup patches which don't change any functionality,
but are needed to align codebase prior accepting other patches.

Memory region (MR) in IB can be huge and ODP (on-demand paging)
technique allows to use unpinned memory, which can be consumed and
released on demand. This allows to applications do not pin down
the underlying physical pages of the address space, and save from them
need to track the validity of the mappings.

Rather, the HCA requests the latest translations from the OS when pages
are not present, and the OS invalidates translations which are no longer
valid due to either non-present pages or mapping changes.

In existing ODP implementation applications is needed to register
memory buffers for communication, though registered memory regions
need not have valid mappings at registration time.

This patch set performs the following steps to expand
current ODP implementation:

1. It refactors UMR to support large regions, by introducing generic
   function to perform HCA translation table modifications. This
   function supports both atomic and process contexts and is not limited
   by number of modified entries.

   This function allows to enable reallocated memory regions of
   arbitrary size, so adding MR cache buckets to support up to 16GB MRs.

2. It changes page fault event format and refactor page faults logic
   together with addition of atomic support.

3. It prepares mlx5 core code to support implicit registration with
   simplified and relaxed semantics.

   Implicit ODP semantics allows to applications provide special memory
   key that represents their complete address space. Thus all IO accesses
   referencing to this key (with proper access rights associated with the key)
   wouldn't need not register any virtual address range.

Thanks,
        Artemy, Ilya and Leon

v1->v2:
  - Don't use 'inline' in .c files
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 85eb018f aa8e08d2
......@@ -672,17 +672,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
uhw->outlen)) {
resp.mlx5_ib_support_multi_pkt_send_wqes =
MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
resp.response_length +=
sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
}
if (field_avail(typeof(resp), reserved, uhw->outlen))
resp.response_length += sizeof(resp.reserved);
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
resp.cqe_comp_caps.max_num =
MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
......@@ -706,6 +695,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.response_length += sizeof(resp.packet_pacing_caps);
}
if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
uhw->outlen)) {
resp.mlx5_ib_support_multi_pkt_send_wqes =
MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
resp.response_length +=
sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
}
if (field_avail(typeof(resp), reserved, uhw->outlen))
resp.response_length += sizeof(resp.reserved);
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
......@@ -1112,11 +1112,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
context->upd_xlt_page = __get_free_page(GFP_KERNEL);
if (!context->upd_xlt_page) {
err = -ENOMEM;
goto out_uars;
}
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_core_alloc_transport_domain(dev->mdev,
&context->tdn);
if (err)
goto out_uars;
goto out_page;
}
INIT_LIST_HEAD(&context->vma_private_list);
......@@ -1168,6 +1175,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
......@@ -1195,6 +1205,8 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
free_page(context->upd_xlt_page);
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
......@@ -3307,6 +3319,9 @@ static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
.pfault = mlx5_ib_pfault,
#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
......@@ -3317,25 +3332,14 @@ static int __init mlx5_ib_init(void)
if (deprecated_prof_sel != 2)
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
err = mlx5_ib_odp_init();
if (err)
return err;
err = mlx5_register_interface(&mlx5_ib_interface);
if (err)
goto clean_odp;
return err;
clean_odp:
mlx5_ib_odp_cleanup();
return err;
}
static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
mlx5_ib_odp_cleanup();
}
module_init(mlx5_ib_init);
......
......@@ -159,7 +159,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
unsigned long umem_page_shift = ilog2(umem->page_size);
int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
int i, k;
int i, k, idx;
u64 cur = 0;
u64 base;
int len;
......@@ -185,18 +185,36 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> umem_page_shift;
base = sg_dma_address(sg);
for (k = 0; k < len; k++) {
/* Skip elements below offset */
if (i + len < offset << shift) {
i += len;
continue;
}
/* Skip pages below offset */
if (i < offset << shift) {
k = (offset << shift) - i;
i = offset << shift;
} else {
k = 0;
}
for (; k < len; k++) {
if (!(i & mask)) {
cur = base + (k << umem_page_shift);
cur |= access_flags;
idx = (i >> shift) - offset;
pas[i >> shift] = cpu_to_be64(cur);
pas[idx] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
i >> shift, be64_to_cpu(pas[i >> shift]));
} else
mlx5_ib_dbg(dev, "=====> 0x%llx\n",
base + (k << umem_page_shift));
i >> shift, be64_to_cpu(pas[idx]));
}
i++;
/* Stop after num_pages reached */
if (i >> shift >= offset + num_pages)
return;
}
}
}
......
......@@ -125,6 +125,10 @@ struct mlx5_ib_ucontext {
/* Transport Domain number */
u32 tdn;
struct list_head vma_private_list;
unsigned long upd_xlt_page;
/* protect ODP/KSM */
struct mutex upd_xlt_page_mutex;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
......@@ -174,13 +178,12 @@ struct mlx5_ib_flow_db {
* enum ib_send_flags and enum ib_qp_type for low-level driver
*/
#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3)
#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4)
#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END
#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0)
#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1)
#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2)
#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3)
#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4)
#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
/*
......@@ -190,6 +193,16 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
#define MLX5_IB_UMR_OCTOWORD 16
#define MLX5_IB_UMR_XLT_ALIGNMENT 64
#define MLX5_IB_UPD_XLT_ZAP BIT(0)
#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
#define MLX5_IB_UPD_XLT_ADDR BIT(3)
#define MLX5_IB_UPD_XLT_PD BIT(4)
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
* These flags are intended for internal use by the mlx5_ib driver, and they
......@@ -264,29 +277,6 @@ struct mlx5_ib_rwq_ind_table {
u32 rqtn;
};
/*
* Connect-IB can trigger up to four concurrent pagefaults
* per-QP.
*/
enum mlx5_ib_pagefault_context {
MLX5_IB_PAGEFAULT_RESPONDER_READ,
MLX5_IB_PAGEFAULT_REQUESTOR_READ,
MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
MLX5_IB_PAGEFAULT_CONTEXTS
};
static inline enum mlx5_ib_pagefault_context
mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
{
return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
}
struct mlx5_ib_pfault {
struct work_struct work;
struct mlx5_pagefault mpfault;
};
struct mlx5_ib_ubuffer {
struct ib_umem *umem;
int buf_size;
......@@ -372,20 +362,6 @@ struct mlx5_ib_qp {
/* Store signature errors */
bool signature_en;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/*
* A flag that is true for QP's that are in a state that doesn't
* allow page faults, and shouldn't schedule any more faults.
*/
int disable_page_faults;
/*
* The disable_page_faults_lock protects a QP's disable_page_faults
* field, allowing for a thread to atomically check whether the QP
* allows page faults, and if so schedule a page fault.
*/
spinlock_t disable_page_faults_lock;
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
#endif
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
......@@ -414,13 +390,11 @@ enum mlx5_ib_qp_flags {
struct mlx5_umr_wr {
struct ib_send_wr wr;
union {
u64 virt_addr;
u64 offset;
} target;
u64 virt_addr;
u64 offset;
struct ib_pd *pd;
unsigned int page_shift;
unsigned int npages;
unsigned int xlt_size;
u64 length;
int access_flags;
u32 mkey;
......@@ -634,6 +608,7 @@ struct mlx5_ib_dev {
int fill_delay;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
u64 odp_max_size;
/*
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
......@@ -787,8 +762,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
int npages, int zap);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
......@@ -857,18 +832,13 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
extern struct workqueue_struct *mlx5_ib_page_fault_wq;
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
struct mlx5_ib_pfault *pfault);
void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
struct mlx5_pagefault *pfault);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
......@@ -877,13 +847,10 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
return;
}
static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
......
This diff is collapsed.
This diff is collapsed.
......@@ -1526,9 +1526,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
if (init_attr->qp_type != IB_QPT_RAW_PACKET)
mlx5_ib_odp_create_qp(qp);
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
......@@ -1923,7 +1920,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
mlx5_ib_qp_disable_pagefaults(qp);
err = mlx5_core_qp_modify(dev->mdev,
MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp);
......@@ -2823,16 +2819,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (mlx5_st < 0)
goto out;
/* If moving to a reset or error state, we must disable page faults on
* this QP and flush all current page faults. Otherwise a stale page
* fault may attempt to work on this QP after it is reset and moved
* again to RTS, and may cause the driver and the device to get out of
* sync. */
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
(new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
(qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_disable_pagefaults(qp);
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
!optab[mlx5_cur][mlx5_new])
goto out;
......@@ -2864,10 +2850,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (err)
goto out;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
(qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_enable_pagefaults(qp);
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
......@@ -3080,9 +3062,10 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
static __be16 get_klm_octo(int npages)
static u64 get_xlt_octo(u64 bytes)
{
return cpu_to_be16(ALIGN(npages, 8) / 2);
return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
MLX5_IB_UMR_OCTOWORD;
}
static __be64 frwr_mkey_mask(void)
......@@ -3127,18 +3110,14 @@ static __be64 sig_mkey_mask(void)
}
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
struct mlx5_ib_mr *mr)
struct mlx5_ib_mr *mr)
{
int ndescs = mr->ndescs;
int size = mr->ndescs * mr->desc_size;
memset(umr, 0, sizeof(*umr));
if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
/* KLMs take twice the size of MTTs */
ndescs *= 2;
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
umr->klm_octowords = get_klm_octo(ndescs);
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask();
}
......@@ -3149,37 +3128,17 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
umr->flags = MLX5_UMR_INLINE;
}
static __be64 get_umr_reg_mr_mask(int atomic)
static __be64 get_umr_enable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR |
MLX5_MKEY_MASK_PD |
MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
result = MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
if (atomic)
result |= MLX5_MKEY_MASK_A;
return cpu_to_be64(result);
}
static __be64 get_umr_unreg_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_update_mtt_mask(void)
static __be64 get_umr_disable_mr_mask(void)
{
u64 result;
......@@ -3194,23 +3153,22 @@ static __be64 get_umr_update_translation_mask(void)
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR |
MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
MLX5_MKEY_MASK_START_ADDR;
return cpu_to_be64(result);
}
static __be64 get_umr_update_access_mask(void)
static __be64 get_umr_update_access_mask(int atomic)
{
u64 result;
result = MLX5_MKEY_MASK_LW |
result = MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
MLX5_MKEY_MASK_A |
MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
MLX5_MKEY_MASK_RW;
if (atomic)
result |= MLX5_MKEY_MASK_A;
return cpu_to_be64(result);
}
......@@ -3219,9 +3177,7 @@ static __be64 get_umr_update_pd_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_PD |
MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
result = MLX5_MKEY_MASK_PD;
return cpu_to_be64(result);
}
......@@ -3238,24 +3194,24 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
else
umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
umr->klm_octowords = get_klm_octo(umrwr->npages);
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
umr->mkey_mask = get_umr_update_mtt_mask();
umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
}
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
umr->mkey_mask |= get_umr_update_translation_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS)
umr->mkey_mask |= get_umr_update_access_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
umr->mkey_mask |= get_umr_update_pd_mask();
if (!umr->mkey_mask)
umr->mkey_mask = get_umr_reg_mr_mask(atomic);
} else {
umr->mkey_mask = get_umr_unreg_mr_mask();
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
u64 offset = get_xlt_octo(umrwr->offset);
umr->xlt_offset = cpu_to_be16(offset & 0xffff);
umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
}
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
umr->mkey_mask |= get_umr_update_translation_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
umr->mkey_mask |= get_umr_update_access_mask(atomic);
umr->mkey_mask |= get_umr_update_pd_mask();
}
if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
umr->mkey_mask |= get_umr_enable_mr_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
umr->mkey_mask |= get_umr_disable_mr_mask();
if (!wr->num_sge)
umr->flags |= MLX5_UMR_INLINE;
......@@ -3303,17 +3259,17 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
seg->status = MLX5_MKEY_STATUS_FREE;
return;
}
seg->flags = convert_access(umrwr->access_flags);
if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
if (umrwr->pd)
seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
}
if (umrwr->pd)
seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
!umrwr->length)
seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
seg->start_addr = cpu_to_be64(umrwr->virt_addr);
seg->len = cpu_to_be64(umrwr->length);
seg->log2_page_size = umrwr->page_shift;
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
......@@ -3611,7 +3567,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
struct ib_sig_handover_wr *wr, u32 nelements,
struct ib_sig_handover_wr *wr, u32 size,
u32 length, u32 pdn)
{
struct ib_mr *sig_mr = wr->sig_mr;
......@@ -3626,17 +3582,17 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
seg->len = cpu_to_be64(length);
seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
}
static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
u32 nelements)
u32 size)
{
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
umr->klm_octowords = get_klm_octo(nelements);
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
umr->mkey_mask = sig_mkey_mask();
}
......@@ -3648,7 +3604,7 @@ static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
u32 pdn = get_pd(qp)->pdn;
u32 klm_oct_size;
u32 xlt_size;
int region_len, ret;
if (unlikely(wr->wr.num_sge != 1) ||
......@@ -3670,15 +3626,15 @@ static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
klm_oct_size = wr->prot ? 3 : 1;
xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm);
set_sig_umr_segment(*seg, klm_oct_size);
set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
......@@ -4559,14 +4515,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/*
* Wait for any outstanding page faults, in case the user frees memory
* based upon this query's result.
*/
flush_workqueue(mlx5_ib_page_fault_wq);
#endif
mutex_lock(&qp->mutex);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
......
......@@ -71,6 +71,16 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (dev_ctx->context) {
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (dev_ctx->intf->pfault) {
if (priv->pfault) {
mlx5_core_err(dev, "multiple page fault handlers not supported");
} else {
priv->pfault_ctx = dev_ctx->context;
priv->pfault = dev_ctx->intf->pfault;
}
}
#endif
spin_unlock_irq(&priv->ctx_lock);
} else {
kfree(dev_ctx);
......@@ -97,6 +107,15 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (!dev_ctx)
return;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
spin_lock_irq(&priv->ctx_lock);
if (priv->pfault == dev_ctx->intf->pfault)
priv->pfault = NULL;
spin_unlock_irq(&priv->ctx_lock);
synchronize_srcu(&priv->pfault_srcu);
#endif
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
......@@ -329,6 +348,20 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
struct mlx5_pagefault *pfault)
{
struct mlx5_priv *priv = &dev->priv;
int srcu_idx;
srcu_idx = srcu_read_lock(&priv->pfault_srcu);
if (priv->pfault)
priv->pfault(dev, priv->pfault_ctx, pfault);
srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
}
#endif
void mlx5_dev_list_lock(void)
{
mutex_lock(&mlx5_intf_mutex);
......
......@@ -396,7 +396,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq,
cseg->imm = rq->mkey_be;
ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN;
ucseg->klm_octowords =
ucseg->xlt_octowords =
cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
ucseg->bsf_octowords =
cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset));
......
......@@ -152,6 +152,26 @@ static struct mlx5_profile profile[] = {
.size = 8,
.limit = 4
},
.mr_cache[16] = {
.size = 8,
.limit = 4
},
.mr_cache[17] = {
.size = 8,
.limit = 4
},
.mr_cache[18] = {
.size = 8,
.limit = 4
},
.mr_cache[19] = {
.size = 4,
.limit = 2
},
.mr_cache[20] = {
.size = 4,
.limit = 2
},
},
};
......@@ -733,7 +753,8 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev)
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
err = mlx5_create_map_eq(dev, eq,
i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
name, &dev->priv.uuari.uars[0]);
name, &dev->priv.uuari.uars[0],
MLX5_EQ_TYPE_COMP);
if (err) {
kfree(eq);
goto clean;
......@@ -1275,10 +1296,19 @@ static int init_one(struct pci_dev *pdev,
spin_lock_init(&priv->ctx_lock);
mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
err = init_srcu_struct(&priv->pfault_srcu);
if (err) {
dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
err);
goto clean_dev;
}
#endif
err = mlx5_pci_init(dev, priv);
if (err) {
dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
goto clean_dev;
goto clean_srcu;
}
err = mlx5_health_init(dev);
......@@ -1312,7 +1342,11 @@ static int init_one(struct pci_dev *pdev,
mlx5_health_cleanup(dev);
close_pci:
mlx5_pci_close(dev, priv);
clean_srcu:
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
cleanup_srcu_struct(&priv->pfault_srcu);
clean_dev:
#endif
pci_set_drvdata(pdev, NULL);
devlink_free(devlink);
......@@ -1337,6 +1371,9 @@ static void remove_one(struct pci_dev *pdev)
mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
cleanup_srcu_struct(&priv->pfault_srcu);
#endif
pci_set_drvdata(pdev, NULL);
devlink_free(devlink);
}
......
......@@ -86,6 +86,8 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param);
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
struct mlx5_pagefault *pfault);
void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
void mlx5_enter_error_state(struct mlx5_core_dev *dev);
void mlx5_disable_device(struct mlx5_core_dev *dev);
......
......@@ -143,95 +143,6 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
mlx5_core_put_rsc(common);
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
{
struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
struct mlx5_core_qp *qp =
container_of(common, struct mlx5_core_qp, common);
struct mlx5_pagefault pfault;
if (!qp) {
mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
qpn);
return;
}
pfault.event_subtype = eqe->sub_type;
pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
(MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
pfault.bytes_committed = be32_to_cpu(
pf_eqe->bytes_committed);
mlx5_core_dbg(dev,
"PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
eqe->sub_type, pfault.flags);
switch (eqe->sub_type) {
case MLX5_PFAULT_SUBTYPE_RDMA:
/* RDMA based event */
pfault.rdma.r_key =
be32_to_cpu(pf_eqe->rdma.r_key);
pfault.rdma.packet_size =
be16_to_cpu(pf_eqe->rdma.packet_length);
pfault.rdma.rdma_op_len =
be32_to_cpu(pf_eqe->rdma.rdma_op_len);
pfault.rdma.rdma_va =
be64_to_cpu(pf_eqe->rdma.rdma_va);
mlx5_core_dbg(dev,
"PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
qpn, pfault.rdma.r_key);
mlx5_core_dbg(dev,
"PAGE_FAULT: rdma_op_len: 0x%08x,\n",
pfault.rdma.rdma_op_len);
mlx5_core_dbg(dev,
"PAGE_FAULT: rdma_va: 0x%016llx,\n",
pfault.rdma.rdma_va);
mlx5_core_dbg(dev,
"PAGE_FAULT: bytes_committed: 0x%06x\n",
pfault.bytes_committed);
break;
case MLX5_PFAULT_SUBTYPE_WQE:
/* WQE based event */
pfault.wqe.wqe_index =
be16_to_cpu(pf_eqe->wqe.wqe_index);
pfault.wqe.packet_size =
be16_to_cpu(pf_eqe->wqe.packet_length);
mlx5_core_dbg(dev,
"PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
qpn, pfault.wqe.wqe_index);
mlx5_core_dbg(dev,
"PAGE_FAULT: bytes_committed: 0x%06x\n",
pfault.bytes_committed);
break;
default:
mlx5_core_warn(dev,
"Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
eqe->sub_type, qpn);
/* Unsupported page faults should still be resolved by the
* page fault handler
*/
}
if (qp->pfault_handler) {
qp->pfault_handler(qp, &pfault);
} else {
mlx5_core_err(dev,
"ODP event for QP %08x, without a fault handler in QP\n",
qpn);
/* Page fault will remain unresolved. QP will hang until it is
* destroyed
*/
}
mlx5_core_put_rsc(common);
}
#endif
static int create_qprqsq_common(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
int rsc_type)
......@@ -506,31 +417,6 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
}
EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
u8 flags, int error)
{
u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
MLX5_SET(page_fault_resume_in, in, opcode,
MLX5_CMD_OP_PAGE_FAULT_RESUME);
MLX5_SET(page_fault_resume_in, in, qpn, qpn);
if (flags & MLX5_PAGE_FAULT_RESUME_REQUESTOR)
MLX5_SET(page_fault_resume_in, in, req_res, 1);
if (flags & MLX5_PAGE_FAULT_RESUME_WRITE)
MLX5_SET(page_fault_resume_in, in, read_write, 1);
if (flags & MLX5_PAGE_FAULT_RESUME_RDMA)
MLX5_SET(page_fault_resume_in, in, rdma, 1);
if (error)
MLX5_SET(page_fault_resume_in, in, error, 1);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
#endif
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *rq)
{
......
......@@ -534,7 +534,9 @@ struct mlx5_eqe_page_fault {
__be16 wqe_index;
u16 reserved2;
__be16 packet_length;
u8 reserved3[12];
__be32 token;
u8 reserved4[8];
__be32 pftype_wq;
} __packed wqe;
struct {
__be32 r_key;
......@@ -542,9 +544,9 @@ struct mlx5_eqe_page_fault {
__be16 packet_length;
__be32 rdma_op_len;
__be64 rdma_va;
__be32 pftype_token;
} __packed rdma;
} __packed;
__be32 flags_qpn;
} __packed;
struct mlx5_eqe_vport_change {
......
......@@ -42,6 +42,7 @@
#include <linux/vmalloc.h>
#include <linux/radix-tree.h>
#include <linux/workqueue.h>
#include <linux/mempool.h>
#include <linux/interrupt.h>
#include <linux/mlx5/device.h>
......@@ -83,6 +84,7 @@ enum {
MLX5_EQ_VEC_PAGES = 0,
MLX5_EQ_VEC_CMD = 1,
MLX5_EQ_VEC_ASYNC = 2,
MLX5_EQ_VEC_PFAULT = 3,
MLX5_EQ_VEC_COMP_BASE,
};
......@@ -178,6 +180,14 @@ enum mlx5_port_status {
MLX5_PORT_DOWN = 2,
};
enum mlx5_eq_type {
MLX5_EQ_TYPE_COMP,
MLX5_EQ_TYPE_ASYNC,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
MLX5_EQ_TYPE_PF,
#endif
};
struct mlx5_uuar_info {
struct mlx5_uar *uars;
int num_uars;
......@@ -333,6 +343,14 @@ struct mlx5_eq_tasklet {
spinlock_t lock;
};
struct mlx5_eq_pagefault {
struct work_struct work;
/* Pagefaults lock */
spinlock_t lock;
struct workqueue_struct *wq;
mempool_t *pool;
};
struct mlx5_eq {
struct mlx5_core_dev *dev;
__be32 __iomem *doorbell;
......@@ -346,7 +364,13 @@ struct mlx5_eq {
struct list_head list;
int index;
struct mlx5_rsc_debug *dbg;
struct mlx5_eq_tasklet tasklet_ctx;
enum mlx5_eq_type type;
union {
struct mlx5_eq_tasklet tasklet_ctx;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct mlx5_eq_pagefault pf_ctx;
#endif
};
};
struct mlx5_core_psv {
......@@ -370,13 +394,21 @@ struct mlx5_core_sig_ctx {
u32 sigerr_count;
};
enum {
MLX5_MKEY_MR = 1,
MLX5_MKEY_MW,
};
struct mlx5_core_mkey {
u64 iova;
u64 size;
u32 key;
u32 pd;
u32 type;
};
#define MLX5_24BIT_MASK ((1 << 24) - 1)
enum mlx5_res_type {
MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
......@@ -411,6 +443,9 @@ struct mlx5_eq_table {
struct mlx5_eq pages_eq;
struct mlx5_eq async_eq;
struct mlx5_eq cmd_eq;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct mlx5_eq pfault_eq;
#endif
int num_comp_vectors;
/* protect EQs list
*/
......@@ -497,6 +532,7 @@ struct mlx5_fc_stats {
struct mlx5_eswitch;
struct mlx5_lag;
struct mlx5_pagefault;
struct mlx5_rl_entry {
u32 rate;
......@@ -601,6 +637,14 @@ struct mlx5_priv {
struct mlx5_rl_table rl_table;
struct mlx5_port_module_event_stats pme_stats;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void (*pfault)(struct mlx5_core_dev *dev,
void *context,
struct mlx5_pagefault *pfault);
void *pfault_ctx;
struct srcu_struct pfault_srcu;
#endif
};
enum mlx5_device_state {
......@@ -619,6 +663,50 @@ enum mlx5_pci_status {
MLX5_PCI_STATUS_ENABLED,
};
enum mlx5_pagefault_type_flags {
MLX5_PFAULT_REQUESTOR = 1 << 0,
MLX5_PFAULT_WRITE = 1 << 1,
MLX5_PFAULT_RDMA = 1 << 2,
};
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
u32 token;
u8 event_subtype;
u8 type;
union {
/* Initiator or send message responder pagefault details. */
struct {
/* Received packet size, only valid for responders. */
u32 packet_size;
/*
* Number of resource holding WQE, depends on type.
*/
u32 wq_num;
/*
* WQE index. Refers to either the send queue or
* receive queue, according to event_subtype.
*/
u16 wqe_index;
} wqe;
/* RDMA responder pagefault details */
struct {
u32 r_key;
/*
* Received packet size, minimal size page fault
* resolution required for forward progress.
*/
u32 packet_size;
u32 rdma_op_len;
u64 rdma_va;
} rdma;
};
struct mlx5_eq *eq;
struct work_struct work;
};
struct mlx5_td {
struct list_head tirs_list;
u32 tdn;
......@@ -879,15 +967,13 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
int nent, u64 mask, const char *name, struct mlx5_uar *uar);
int nent, u64 mask, const char *name,
struct mlx5_uar *uar, enum mlx5_eq_type type);
int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
int mlx5_start_eqs(struct mlx5_core_dev *dev);
int mlx5_stop_eqs(struct mlx5_core_dev *dev);
......@@ -926,6 +1012,10 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
struct mlx5_odp_caps *odp_caps);
int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
u8 port_num, void *out, size_t sz);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
u32 wq_num, u8 type, int error);
#endif
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
......@@ -959,7 +1049,7 @@ enum {
};
enum {
MAX_MR_CACHE_ENTRIES = 16,
MAX_MR_CACHE_ENTRIES = 21,
};
enum {
......@@ -974,6 +1064,9 @@ struct mlx5_interface {
void (*detach)(struct mlx5_core_dev *dev, void *context);
void (*event)(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param);
void (*pfault)(struct mlx5_core_dev *dev,
void *context,
struct mlx5_pagefault *pfault);
void * (*get_dev)(void *context);
int protocol;
struct list_head list;
......
......@@ -328,7 +328,7 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
u8 receive[0x1];
u8 write[0x1];
u8 read[0x1];
u8 reserved_at_4[0x1];
u8 atomic[0x1];
u8 srq_receive[0x1];
u8 reserved_at_6[0x1a];
};
......@@ -782,11 +782,12 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_eq[0x4];
u8 max_indirection[0x8];
u8 reserved_at_108[0x1];
u8 fixed_buffer_size[0x1];
u8 log_max_mrw_sz[0x7];
u8 reserved_at_110[0x2];
u8 log_max_bsf_list_size[0x6];
u8 reserved_at_118[0x2];
u8 umr_extended_translation_offset[0x1];
u8 null_mkey[0x1];
u8 log_max_klm_list_size[0x6];
u8 reserved_at_120[0xa];
......@@ -826,9 +827,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_1a9[0x2];
u8 local_ca_ack_delay[0x5];
u8 port_module_event[0x1];
u8 reserved_at_1b0[0x1];
u8 reserved_at_1b1[0x1];
u8 ports_check[0x1];
u8 reserved_at_1b2[0x1];
u8 reserved_at_1b3[0x1];
u8 disable_link_up[0x1];
u8 beacon_led[0x1];
u8 port_type[0x2];
......@@ -858,7 +859,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 compact_address_vector[0x1];
u8 striding_rq[0x1];
u8 reserved_at_201[0x2];
u8 reserved_at_202[0x2];
u8 ipoib_basic_offloads[0x1];
u8 reserved_at_205[0xa];
u8 drain_sigerr[0x1];
......@@ -1009,10 +1010,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 rndv_offload_rc[0x1];
u8 rndv_offload_dc[0x1];
u8 log_tag_matching_list_sz[0x5];
u8 reserved_at_5e8[0x3];
u8 reserved_at_5f8[0x3];
u8 log_max_xrq[0x5];
u8 reserved_at_5f0[0x200];
u8 reserved_at_600[0x200];
};
enum mlx5_flow_destination_type {
......@@ -2569,6 +2570,7 @@ enum {
MLX5_MKC_ACCESS_MODE_PA = 0x0,
MLX5_MKC_ACCESS_MODE_MTT = 0x1,
MLX5_MKC_ACCESS_MODE_KLMS = 0x2,
MLX5_MKC_ACCESS_MODE_KSM = 0x3,
};
struct mlx5_ifc_mkc_bits {
......@@ -3677,6 +3679,10 @@ struct mlx5_ifc_query_special_contexts_out_bits {
u8 dump_fill_mkey[0x20];
u8 resd_lkey[0x20];
u8 null_mkey[0x20];
u8 reserved_at_a0[0x60];
};
struct mlx5_ifc_query_special_contexts_in_bits {
......@@ -4769,12 +4775,11 @@ struct mlx5_ifc_page_fault_resume_in_bits {
u8 error[0x1];
u8 reserved_at_41[0x4];
u8 rdma[0x1];
u8 read_write[0x1];
u8 req_res[0x1];
u8 qpn[0x18];
u8 page_fault_type[0x3];
u8 wq_number[0x18];
u8 reserved_at_60[0x20];
u8 reserved_at_60[0x8];
u8 token[0x18];
};
struct mlx5_ifc_nop_out_bits {
......
......@@ -50,9 +50,6 @@
#define MLX5_BSF_APPTAG_ESCAPE 0x1
#define MLX5_BSF_APPREF_ESCAPE 0x2
#define MLX5_QPN_BITS 24
#define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1)
enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX5_QP_OPTPAR_RRE = 1 << 1,
......@@ -215,6 +212,7 @@ struct mlx5_wqe_ctrl_seg {
#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
#define MLX5_WQE_AV_EXT 0x80000000
enum {
MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4,
......@@ -245,6 +243,23 @@ struct mlx5_wqe_masked_atomic_seg {
__be64 compare_mask;
};
struct mlx5_base_av {
union {
struct {
__be32 qkey;
__be32 reserved;
} qkey;
__be64 dc_key;
} key;
__be32 dqp_dct;
u8 stat_rate_sl;
u8 fl_mlid;
union {
__be16 rlid;
__be16 udp_sport;
};
};
struct mlx5_av {
union {
struct {
......@@ -292,10 +307,14 @@ struct mlx5_wqe_data_seg {
struct mlx5_wqe_umr_ctrl_seg {
u8 flags;
u8 rsvd0[3];
__be16 klm_octowords;
__be16 bsf_octowords;
__be16 xlt_octowords;
union {
__be16 xlt_offset;
__be16 bsf_octowords;
};
__be64 mkey_mask;
u8 rsvd1[32];
__be32 xlt_offset_47_16;
u8 rsvd1[28];
};
struct mlx5_seg_set_psv {
......@@ -389,6 +408,10 @@ struct mlx5_bsf {
struct mlx5_bsf_inl m_inl;
};
struct mlx5_mtt {
__be64 ptag;
};
struct mlx5_klm {
__be32 bcount;
__be32 key;
......@@ -410,46 +433,9 @@ struct mlx5_stride_block_ctrl_seg {
__be16 num_entries;
};
enum mlx5_pagefault_flags {
MLX5_PFAULT_REQUESTOR = 1 << 0,
MLX5_PFAULT_WRITE = 1 << 1,
MLX5_PFAULT_RDMA = 1 << 2,
};
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
u8 event_subtype;
enum mlx5_pagefault_flags flags;
union {
/* Initiator or send message responder pagefault details. */
struct {
/* Received packet size, only valid for responders. */
u32 packet_size;
/*
* WQE index. Refers to either the send queue or
* receive queue, according to event_subtype.
*/
u16 wqe_index;
} wqe;
/* RDMA responder pagefault details */
struct {
u32 r_key;
/*
* Received packet size, minimal size page fault
* resolution required for forward progress.
*/
u32 packet_size;
u32 rdma_op_len;
u64 rdma_va;
} rdma;
};
};
struct mlx5_core_qp {
struct mlx5_core_rsc_common common; /* must be first */
void (*event) (struct mlx5_core_qp *, int);
void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
int qpn;
struct mlx5_rsc_debug *dbg;
int pid;
......@@ -549,10 +535,6 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
u8 context, int error);
#endif
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *rq);
void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment