Commit fd47c2f9 authored by Leon Romanovsky's avatar Leon Romanovsky Committed by Jason Gunthorpe

RDMA/restrack: Convert internal DB from hash to XArray

The additions of .doit callbacks posses new access pattern to the resource
entries by some user visible index. Back then, the legacy DB was
implemented as hash because per-index access wasn't needed and XArray
wasn't accepted yet.

Acceptance of XArray together with per-index access requires the refresh
of DB implementation.
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 5f8f5499
...@@ -970,6 +970,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, ...@@ -970,6 +970,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
int start = cb->args[0]; int start = cb->args[0];
bool has_cap_net_admin; bool has_cap_net_admin;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
unsigned long id;
u32 index, port = 0; u32 index, port = 0;
bool filled = false; bool filled = false;
...@@ -1020,7 +1021,12 @@ static int res_get_common_dumpit(struct sk_buff *skb, ...@@ -1020,7 +1021,12 @@ static int res_get_common_dumpit(struct sk_buff *skb,
has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
down_read(&device->res.rwsem); down_read(&device->res.rwsem);
hash_for_each_possible(device->res.hash, res, node, res_type) { /*
* FIXME: if the skip ahead is something common this loop should
* use xas_for_each & xas_pause to optimize, we can have a lot of
* objects.
*/
xa_for_each(&device->res.xa[res_type], id, res) {
if (idx < start) if (idx < start)
goto next; goto next;
...@@ -1047,11 +1053,6 @@ static int res_get_common_dumpit(struct sk_buff *skb, ...@@ -1047,11 +1053,6 @@ static int res_get_common_dumpit(struct sk_buff *skb,
rdma_restrack_put(res); rdma_restrack_put(res);
if (ret == -EMSGSIZE) if (ret == -EMSGSIZE)
/*
* There is a chance to optimize here.
* It can be done by using list_prepare_entry
* and list_for_each_entry_continue afterwards.
*/
break; break;
if (ret) if (ret)
goto res_err; goto res_err;
......
...@@ -12,6 +12,28 @@ ...@@ -12,6 +12,28 @@
#include "cma_priv.h" #include "cma_priv.h"
static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
u32 *next)
{
int err;
*id = *next;
if (*next == U32_MAX)
*id = 0;
xa_lock(xa);
err = __xa_alloc(xa, id, U32_MAX, entry, GFP_KERNEL);
if (err && *next != U32_MAX) {
*id = 0;
err = __xa_alloc(xa, id, *next, entry, GFP_KERNEL);
}
if (!err)
*next = *id + 1;
xa_unlock(xa);
return err;
}
/** /**
* rdma_restrack_init() - initialize resource tracking * rdma_restrack_init() - initialize resource tracking
* @dev: IB device * @dev: IB device
...@@ -19,6 +41,10 @@ ...@@ -19,6 +41,10 @@
void rdma_restrack_init(struct ib_device *dev) void rdma_restrack_init(struct ib_device *dev)
{ {
struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_root *res = &dev->res;
int i;
for (i = 0 ; i < RDMA_RESTRACK_MAX; i++)
xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC);
init_rwsem(&res->rwsem); init_rwsem(&res->rwsem);
} }
...@@ -46,33 +72,42 @@ void rdma_restrack_clean(struct ib_device *dev) ...@@ -46,33 +72,42 @@ void rdma_restrack_clean(struct ib_device *dev)
struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_root *res = &dev->res;
struct rdma_restrack_entry *e; struct rdma_restrack_entry *e;
char buf[TASK_COMM_LEN]; char buf[TASK_COMM_LEN];
bool found = false;
const char *owner; const char *owner;
int bkt; int i;
if (hash_empty(res->hash)) for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
return; if (!xa_empty(&res->xa[i])) {
unsigned long index;
dev = container_of(res, struct ib_device, res);
pr_err("restrack: %s", CUT_HERE); if (!found) {
dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n"); pr_err("restrack: %s", CUT_HERE);
hash_for_each(res->hash, bkt, e, node) { dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
if (rdma_is_kernel_res(e)) { }
owner = e->kern_name; xa_for_each(&res->xa[i], index, e) {
} else { if (rdma_is_kernel_res(e)) {
/* owner = e->kern_name;
* There is no need to call get_task_struct here, } else {
* because we can be here only if there are more /*
* get_task_struct() call than put_task_struct(). * There is no need to call get_task_struct here,
*/ * because we can be here only if there are more
get_task_comm(buf, e->task); * get_task_struct() call than put_task_struct().
owner = buf; */
get_task_comm(buf, e->task);
owner = buf;
}
pr_err("restrack: %s %s object allocated by %s is not freed\n",
rdma_is_kernel_res(e) ? "Kernel" :
"User",
type2str(e->type), owner);
}
found = true;
} }
xa_destroy(&res->xa[i]);
pr_err("restrack: %s %s object allocated by %s is not freed\n",
rdma_is_kernel_res(e) ? "Kernel" : "User",
type2str(e->type), owner);
} }
pr_err("restrack: %s", CUT_HERE); if (found)
pr_err("restrack: %s", CUT_HERE);
} }
/** /**
...@@ -86,10 +121,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, ...@@ -86,10 +121,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
{ {
struct rdma_restrack_root *res = &dev->res; struct rdma_restrack_root *res = &dev->res;
struct rdma_restrack_entry *e; struct rdma_restrack_entry *e;
unsigned long index = 0;
u32 cnt = 0; u32 cnt = 0;
down_read(&res->rwsem); down_read(&res->rwsem);
hash_for_each_possible(res->hash, e, node, type) { xa_for_each(&res->xa[type], index, e) {
if (ns == &init_pid_ns || if (ns == &init_pid_ns ||
(!rdma_is_kernel_res(e) && (!rdma_is_kernel_res(e) &&
ns == task_active_pid_ns(e->task))) ns == task_active_pid_ns(e->task)))
...@@ -166,16 +202,20 @@ EXPORT_SYMBOL(rdma_restrack_set_task); ...@@ -166,16 +202,20 @@ EXPORT_SYMBOL(rdma_restrack_set_task);
static void rdma_restrack_add(struct rdma_restrack_entry *res) static void rdma_restrack_add(struct rdma_restrack_entry *res)
{ {
struct ib_device *dev = res_to_dev(res); struct ib_device *dev = res_to_dev(res);
int ret;
if (!dev) if (!dev)
return; return;
kref_init(&res->kref); kref_init(&res->kref);
init_completion(&res->comp); init_completion(&res->comp);
res->valid = true;
down_write(&dev->res.rwsem); down_write(&dev->res.rwsem);
hash_add(dev->res.hash, &res->node, res->type); ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res,
&dev->res.next_id[res->type]);
if (!ret)
res->valid = true;
up_write(&dev->res.rwsem); up_write(&dev->res.rwsem);
} }
...@@ -241,15 +281,14 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) ...@@ -241,15 +281,14 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
if (!dev) if (!dev)
return; return;
rdma_restrack_put(res);
wait_for_completion(&res->comp);
down_write(&dev->res.rwsem); down_write(&dev->res.rwsem);
hash_del(&res->node); xa_erase(&dev->res.xa[res->type], res->id);
res->valid = false; res->valid = false;
up_write(&dev->res.rwsem); up_write(&dev->res.rwsem);
rdma_restrack_put(res);
wait_for_completion(&res->comp);
out: out:
if (res->task) { if (res->task) {
put_task_struct(res->task); put_task_struct(res->task);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <uapi/rdma/rdma_netlink.h> #include <uapi/rdma/rdma_netlink.h>
#include <linux/xarray.h>
/** /**
* enum rdma_restrack_type - HW objects to track * enum rdma_restrack_type - HW objects to track
...@@ -48,7 +49,6 @@ enum rdma_restrack_type { ...@@ -48,7 +49,6 @@ enum rdma_restrack_type {
RDMA_RESTRACK_MAX RDMA_RESTRACK_MAX
}; };
#define RDMA_RESTRACK_HASH_BITS 8
struct ib_device; struct ib_device;
struct rdma_restrack_entry; struct rdma_restrack_entry;
...@@ -62,9 +62,17 @@ struct rdma_restrack_root { ...@@ -62,9 +62,17 @@ struct rdma_restrack_root {
*/ */
struct rw_semaphore rwsem; struct rw_semaphore rwsem;
/** /**
* @hash: global database for all resources per-device * @xa: Array of XArray structures to hold restrack entries.
* We want to use array of XArrays because insertion is type
* dependent. For types with xisiting unique ID (like QPN),
* we will insert to that unique index. For other types,
* we insert based on pointers and auto-allocate unique index.
*/ */
DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS); struct xarray xa[RDMA_RESTRACK_MAX];
/**
* @next_id: Next ID to support cyclic allocation
*/
u32 next_id[RDMA_RESTRACK_MAX];
}; };
/** /**
...@@ -102,10 +110,6 @@ struct rdma_restrack_entry { ...@@ -102,10 +110,6 @@ struct rdma_restrack_entry {
* @kern_name: name of owner for the kernel created entities. * @kern_name: name of owner for the kernel created entities.
*/ */
const char *kern_name; const char *kern_name;
/**
* @node: hash table entry
*/
struct hlist_node node;
/** /**
* @type: various objects in restrack database * @type: various objects in restrack database
*/ */
...@@ -114,6 +118,10 @@ struct rdma_restrack_entry { ...@@ -114,6 +118,10 @@ struct rdma_restrack_entry {
* @user: user resource * @user: user resource
*/ */
bool user; bool user;
/**
* @id: ID to expose to users
*/
u32 id;
}; };
void rdma_restrack_init(struct ib_device *dev); void rdma_restrack_init(struct ib_device *dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment