Commit 2a600f14 authored by Mike Marciniszyn's avatar Mike Marciniszyn Committed by Roland Dreier

IB/qib: RDMA lkey/rkey validation is inefficient for large MRs

The current code loops during rkey/lkey validiation to isolate the MR
for the RDMA, which is expensive when the current operation is inside
a very large memory region.

This fix optimizes rkey/lkey validation routines for user memory
regions and fast memory regions.  The MR entry can be isolated by
shifts/mods instead of looping.  The existing loop is preserved for
phys memory regions for now.
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@qlogic.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 7c3edd3f
...@@ -158,31 +158,47 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, ...@@ -158,31 +158,47 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
isge->sge_length = sge->length; isge->sge_length = sge->length;
isge->m = 0; isge->m = 0;
isge->n = 0; isge->n = 0;
spin_unlock_irqrestore(&rkt->lock, flags);
goto ok; goto ok;
} }
mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
if (unlikely(mr == NULL || mr->lkey != sge->lkey || if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
mr->pd != &pd->ibpd)) mr->pd != &pd->ibpd))
goto bail; goto bail;
atomic_inc(&mr->refcount);
spin_unlock_irqrestore(&rkt->lock, flags);
off = sge->addr - mr->user_base; off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base || if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length || off + sge->length > mr->length ||
(mr->access_flags & acc) != acc)) (mr->access_flags & acc) != acc))
goto bail; return ret;
off += mr->offset; off += mr->offset;
m = 0; if (mr->page_shift) {
n = 0; /*
while (off >= mr->map[m]->segs[n].length) { page sizes are uniform power of 2 so no loop is necessary
off -= mr->map[m]->segs[n].length; entries_spanned_by_off is the number of times the loop below
n++; would have executed.
if (n >= QIB_SEGSZ) { */
m++; size_t entries_spanned_by_off;
n = 0;
entries_spanned_by_off = off >> mr->page_shift;
off -= (entries_spanned_by_off << mr->page_shift);
m = entries_spanned_by_off/QIB_SEGSZ;
n = entries_spanned_by_off%QIB_SEGSZ;
} else {
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
if (n >= QIB_SEGSZ) {
m++;
n = 0;
}
} }
} }
atomic_inc(&mr->refcount);
isge->mr = mr; isge->mr = mr;
isge->vaddr = mr->map[m]->segs[n].vaddr + off; isge->vaddr = mr->map[m]->segs[n].vaddr + off;
isge->length = mr->map[m]->segs[n].length - off; isge->length = mr->map[m]->segs[n].length - off;
...@@ -191,6 +207,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, ...@@ -191,6 +207,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
isge->n = n; isge->n = n;
ok: ok:
ret = 1; ret = 1;
return ret;
bail: bail:
spin_unlock_irqrestore(&rkt->lock, flags); spin_unlock_irqrestore(&rkt->lock, flags);
return ret; return ret;
...@@ -237,30 +254,46 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, ...@@ -237,30 +254,46 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
sge->sge_length = len; sge->sge_length = len;
sge->m = 0; sge->m = 0;
sge->n = 0; sge->n = 0;
spin_unlock_irqrestore(&rkt->lock, flags);
goto ok; goto ok;
} }
mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail; goto bail;
atomic_inc(&mr->refcount);
spin_unlock_irqrestore(&rkt->lock, flags);
off = vaddr - mr->iova; off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length || if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0)) (mr->access_flags & acc) == 0))
goto bail; return ret;
off += mr->offset; off += mr->offset;
m = 0; if (mr->page_shift) {
n = 0; /*
while (off >= mr->map[m]->segs[n].length) { page sizes are uniform power of 2 so no loop is necessary
off -= mr->map[m]->segs[n].length; entries_spanned_by_off is the number of times the loop below
n++; would have executed.
if (n >= QIB_SEGSZ) { */
m++; size_t entries_spanned_by_off;
n = 0;
entries_spanned_by_off = off >> mr->page_shift;
off -= (entries_spanned_by_off << mr->page_shift);
m = entries_spanned_by_off/QIB_SEGSZ;
n = entries_spanned_by_off%QIB_SEGSZ;
} else {
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
if (n >= QIB_SEGSZ) {
m++;
n = 0;
}
} }
} }
atomic_inc(&mr->refcount);
sge->mr = mr; sge->mr = mr;
sge->vaddr = mr->map[m]->segs[n].vaddr + off; sge->vaddr = mr->map[m]->segs[n].vaddr + off;
sge->length = mr->map[m]->segs[n].length - off; sge->length = mr->map[m]->segs[n].length - off;
...@@ -269,6 +302,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, ...@@ -269,6 +302,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
sge->n = n; sge->n = n;
ok: ok:
ret = 1; ret = 1;
return ret;
bail: bail:
spin_unlock_irqrestore(&rkt->lock, flags); spin_unlock_irqrestore(&rkt->lock, flags);
return ret; return ret;
......
...@@ -39,7 +39,6 @@ ...@@ -39,7 +39,6 @@
/* Fast memory region */ /* Fast memory region */
struct qib_fmr { struct qib_fmr {
struct ib_fmr ibfmr; struct ib_fmr ibfmr;
u8 page_shift;
struct qib_mregion mr; /* must be last */ struct qib_mregion mr; /* must be last */
}; };
...@@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) ...@@ -107,6 +106,7 @@ static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
goto bail; goto bail;
} }
mr->mr.mapsz = m; mr->mr.mapsz = m;
mr->mr.page_shift = 0;
mr->mr.max_segs = count; mr->mr.max_segs = count;
/* /*
...@@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -231,6 +231,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.access_flags = mr_access_flags; mr->mr.access_flags = mr_access_flags;
mr->umem = umem; mr->umem = umem;
if (is_power_of_2(umem->page_size))
mr->mr.page_shift = ilog2(umem->page_size);
m = 0; m = 0;
n = 0; n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list) { list_for_each_entry(chunk, &umem->chunk_list, list) {
...@@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, ...@@ -390,7 +392,7 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
fmr->mr.offset = 0; fmr->mr.offset = 0;
fmr->mr.access_flags = mr_access_flags; fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages; fmr->mr.max_segs = fmr_attr->max_pages;
fmr->page_shift = fmr_attr->page_shift; fmr->mr.page_shift = fmr_attr->page_shift;
atomic_set(&fmr->mr.refcount, 0); atomic_set(&fmr->mr.refcount, 0);
ret = &fmr->ibfmr; ret = &fmr->ibfmr;
...@@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, ...@@ -437,7 +439,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
spin_lock_irqsave(&rkt->lock, flags); spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = iova; fmr->mr.user_base = iova;
fmr->mr.iova = iova; fmr->mr.iova = iova;
ps = 1 << fmr->page_shift; ps = 1 << fmr->mr.page_shift;
fmr->mr.length = list_len * ps; fmr->mr.length = list_len * ps;
m = 0; m = 0;
n = 0; n = 0;
......
...@@ -301,6 +301,7 @@ struct qib_mregion { ...@@ -301,6 +301,7 @@ struct qib_mregion {
int access_flags; int access_flags;
u32 max_segs; /* number of qib_segs in all the arrays */ u32 max_segs; /* number of qib_segs in all the arrays */
u32 mapsz; /* size of the map array */ u32 mapsz; /* size of the map array */
u8 page_shift; /* 0 - non unform/non powerof2 sizes */
atomic_t refcount; atomic_t refcount;
struct qib_segarray *map[0]; /* the segments */ struct qib_segarray *map[0]; /* the segments */
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment