Commit c30ee8b7 authored by David S. Miller's avatar David S. Miller

Merge branch 'rds-more-fixes'

Santosh Shilimkar says:

====================
RDS: Few more fixes

As indicated in the earlier series [1], this is a follow-up series which
addresses few issues around the RDS FMR code. With [1] and the subject
series, now I can run many parallel threads with multiple sockets with
N x N traffic. The stress tests has survived overnight runs.

[1] https://lkml.org/lkml/2015/8/22/127
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b8766e4e 27241214
...@@ -366,6 +366,7 @@ void rds_ib_exit(void) ...@@ -366,6 +366,7 @@ void rds_ib_exit(void)
rds_ib_sysctl_exit(); rds_ib_sysctl_exit();
rds_ib_recv_exit(); rds_ib_recv_exit();
rds_trans_unregister(&rds_ib_transport); rds_trans_unregister(&rds_ib_transport);
rds_ib_fmr_exit();
} }
struct rds_transport rds_ib_transport = { struct rds_transport rds_ib_transport = {
...@@ -401,10 +402,14 @@ int rds_ib_init(void) ...@@ -401,10 +402,14 @@ int rds_ib_init(void)
INIT_LIST_HEAD(&rds_ib_devices); INIT_LIST_HEAD(&rds_ib_devices);
ret = ib_register_client(&rds_ib_client); ret = rds_ib_fmr_init();
if (ret) if (ret)
goto out; goto out;
ret = ib_register_client(&rds_ib_client);
if (ret)
goto out_fmr_exit;
ret = rds_ib_sysctl_init(); ret = rds_ib_sysctl_init();
if (ret) if (ret)
goto out_ibreg; goto out_ibreg;
...@@ -427,6 +432,8 @@ int rds_ib_init(void) ...@@ -427,6 +432,8 @@ int rds_ib_init(void)
rds_ib_sysctl_exit(); rds_ib_sysctl_exit();
out_ibreg: out_ibreg:
rds_ib_unregister_client(); rds_ib_unregister_client();
out_fmr_exit:
rds_ib_fmr_exit();
out: out:
return ret; return ret;
} }
......
...@@ -313,6 +313,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, ...@@ -313,6 +313,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
void rds_ib_sync_mr(void *trans_private, int dir); void rds_ib_sync_mr(void *trans_private, int dir);
void rds_ib_free_mr(void *trans_private, int invalidate); void rds_ib_free_mr(void *trans_private, int invalidate);
void rds_ib_flush_mrs(void); void rds_ib_flush_mrs(void);
int rds_ib_fmr_init(void);
void rds_ib_fmr_exit(void);
/* ib_recv.c */ /* ib_recv.c */
int rds_ib_recv_init(void); int rds_ib_recv_init(void);
......
...@@ -83,6 +83,25 @@ struct rds_ib_mr_pool { ...@@ -83,6 +83,25 @@ struct rds_ib_mr_pool {
struct ib_fmr_attr fmr_attr; struct ib_fmr_attr fmr_attr;
}; };
struct workqueue_struct *rds_ib_fmr_wq;
int rds_ib_fmr_init(void)
{
rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
if (!rds_ib_fmr_wq)
return -ENOMEM;
return 0;
}
/* By the time this is called all the IB devices should have been torn down and
* had their pools freed. As each pool is freed its work struct is waited on,
* so the pool flushing work queue should be idle by the time we get here.
*/
void rds_ib_fmr_exit(void)
{
destroy_workqueue(rds_ib_fmr_wq);
}
static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **); static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
static void rds_ib_mr_pool_flush_worker(struct work_struct *work); static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
...@@ -341,8 +360,6 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) ...@@ -341,8 +360,6 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
goto out_no_cigar; goto out_no_cigar;
} }
memset(ibmr, 0, sizeof(*ibmr));
ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
(IB_ACCESS_LOCAL_WRITE | (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_READ |
...@@ -528,11 +545,13 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr ...@@ -528,11 +545,13 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
/* /*
* given an llist of mrs, put them all into the list_head for more processing * given an llist of mrs, put them all into the list_head for more processing
*/ */
static void llist_append_to_list(struct llist_head *llist, struct list_head *list) static unsigned int llist_append_to_list(struct llist_head *llist,
struct list_head *list)
{ {
struct rds_ib_mr *ibmr; struct rds_ib_mr *ibmr;
struct llist_node *node; struct llist_node *node;
struct llist_node *next; struct llist_node *next;
unsigned int count = 0;
node = llist_del_all(llist); node = llist_del_all(llist);
while (node) { while (node) {
...@@ -540,7 +559,9 @@ static void llist_append_to_list(struct llist_head *llist, struct list_head *lis ...@@ -540,7 +559,9 @@ static void llist_append_to_list(struct llist_head *llist, struct list_head *lis
ibmr = llist_entry(node, struct rds_ib_mr, llnode); ibmr = llist_entry(node, struct rds_ib_mr, llnode);
list_add_tail(&ibmr->unmap_list, list); list_add_tail(&ibmr->unmap_list, list);
node = next; node = next;
count++;
} }
return count;
} }
/* /*
...@@ -581,7 +602,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, ...@@ -581,7 +602,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
LIST_HEAD(unmap_list); LIST_HEAD(unmap_list);
LIST_HEAD(fmr_list); LIST_HEAD(fmr_list);
unsigned long unpinned = 0; unsigned long unpinned = 0;
unsigned int nfreed = 0, ncleaned = 0, free_goal; unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
int ret = 0; int ret = 0;
rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
...@@ -623,8 +644,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, ...@@ -623,8 +644,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
/* Get the list of all MRs to be dropped. Ordering matters - /* Get the list of all MRs to be dropped. Ordering matters -
* we want to put drop_list ahead of free_list. * we want to put drop_list ahead of free_list.
*/ */
llist_append_to_list(&pool->drop_list, &unmap_list); dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
llist_append_to_list(&pool->free_list, &unmap_list); dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
if (free_all) if (free_all)
llist_append_to_list(&pool->clean_list, &unmap_list); llist_append_to_list(&pool->clean_list, &unmap_list);
...@@ -652,7 +673,6 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, ...@@ -652,7 +673,6 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
kfree(ibmr); kfree(ibmr);
nfreed++; nfreed++;
} }
ncleaned++;
} }
if (!list_empty(&unmap_list)) { if (!list_empty(&unmap_list)) {
...@@ -678,7 +698,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, ...@@ -678,7 +698,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
} }
atomic_sub(unpinned, &pool->free_pinned); atomic_sub(unpinned, &pool->free_pinned);
atomic_sub(ncleaned, &pool->dirty_count); atomic_sub(dirty_to_clean, &pool->dirty_count);
atomic_sub(nfreed, &pool->item_count); atomic_sub(nfreed, &pool->item_count);
out: out:
...@@ -715,16 +735,18 @@ void rds_ib_free_mr(void *trans_private, int invalidate) ...@@ -715,16 +735,18 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
/* If we've pinned too many pages, request a flush */ /* If we've pinned too many pages, request a flush */
if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
atomic_read(&pool->dirty_count) >= pool->max_items / 10) atomic_read(&pool->dirty_count) >= pool->max_items / 5)
schedule_delayed_work(&pool->flush_worker, 10); queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
if (invalidate) { if (invalidate) {
if (likely(!in_interrupt())) { if (likely(!in_interrupt())) {
rds_ib_flush_mr_pool(pool, 0, NULL); rds_ib_flush_mr_pool(pool, 0, NULL);
} else { } else {
/* We get here if the user created a MR marked /* We get here if the user created a MR marked
* as use_once and invalidate at the same time. */ * as use_once and invalidate at the same time.
schedule_delayed_work(&pool->flush_worker, 10); */
queue_delayed_work(rds_ib_fmr_wq,
&pool->flush_worker, 10);
} }
} }
......
...@@ -1102,9 +1102,10 @@ int rds_ib_recv_init(void) ...@@ -1102,9 +1102,10 @@ int rds_ib_recv_init(void)
rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
sizeof(struct rds_page_frag), sizeof(struct rds_page_frag),
0, SLAB_HWCACHE_ALIGN, NULL); 0, SLAB_HWCACHE_ALIGN, NULL);
if (!rds_ib_frag_slab) if (!rds_ib_frag_slab) {
kmem_cache_destroy(rds_ib_incoming_slab); kmem_cache_destroy(rds_ib_incoming_slab);
else rds_ib_incoming_slab = NULL;
} else
ret = 0; ret = 0;
out: out:
return ret; return ret;
......
...@@ -435,9 +435,10 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) ...@@ -435,9 +435,10 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
/* If the MR was marked as invalidate, this will /* If the MR was marked as invalidate, this will
* trigger an async flush. */ * trigger an async flush. */
if (zot_me) if (zot_me) {
rds_destroy_mr(mr); rds_destroy_mr(mr);
rds_mr_put(mr); rds_mr_put(mr);
}
} }
void rds_rdma_free_op(struct rm_rdma_op *ro) void rds_rdma_free_op(struct rm_rdma_op *ro)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment