Commit fd47c2f9 authored by Leon Romanovsky's avatar Leon Romanovsky Committed by Jason Gunthorpe

RDMA/restrack: Convert internal DB from hash to XArray

The additions of .doit callbacks posses new access pattern to the resource
entries by some user visible index. Back then, the legacy DB was
implemented as hash because per-index access wasn't needed and XArray
wasn't accepted yet.

Acceptance of XArray together with per-index access requires the refresh
of DB implementation.
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 5f8f5499
......@@ -970,6 +970,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
int start = cb->args[0];
bool has_cap_net_admin;
struct nlmsghdr *nlh;
unsigned long id;
u32 index, port = 0;
bool filled = false;
......@@ -1020,7 +1021,12 @@ static int res_get_common_dumpit(struct sk_buff *skb,
has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
down_read(&device->res.rwsem);
hash_for_each_possible(device->res.hash, res, node, res_type) {
/*
* FIXME: if the skip ahead is something common this loop should
* use xas_for_each & xas_pause to optimize, we can have a lot of
* objects.
*/
xa_for_each(&device->res.xa[res_type], id, res) {
if (idx < start)
goto next;
......@@ -1047,11 +1053,6 @@ static int res_get_common_dumpit(struct sk_buff *skb,
rdma_restrack_put(res);
if (ret == -EMSGSIZE)
/*
* There is a chance to optimize here.
* It can be done by using list_prepare_entry
* and list_for_each_entry_continue afterwards.
*/
break;
if (ret)
goto res_err;
......
......@@ -12,6 +12,28 @@
#include "cma_priv.h"
static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
u32 *next)
{
int err;
*id = *next;
if (*next == U32_MAX)
*id = 0;
xa_lock(xa);
err = __xa_alloc(xa, id, U32_MAX, entry, GFP_KERNEL);
if (err && *next != U32_MAX) {
*id = 0;
err = __xa_alloc(xa, id, *next, entry, GFP_KERNEL);
}
if (!err)
*next = *id + 1;
xa_unlock(xa);
return err;
}
/**
* rdma_restrack_init() - initialize resource tracking
* @dev: IB device
......@@ -19,6 +41,10 @@
void rdma_restrack_init(struct ib_device *dev)
{
struct rdma_restrack_root *res = &dev->res;
int i;
for (i = 0 ; i < RDMA_RESTRACK_MAX; i++)
xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC);
init_rwsem(&res->rwsem);
}
......@@ -46,16 +72,19 @@ void rdma_restrack_clean(struct ib_device *dev)
struct rdma_restrack_root *res = &dev->res;
struct rdma_restrack_entry *e;
char buf[TASK_COMM_LEN];
bool found = false;
const char *owner;
int bkt;
int i;
if (hash_empty(res->hash))
return;
for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
if (!xa_empty(&res->xa[i])) {
unsigned long index;
dev = container_of(res, struct ib_device, res);
if (!found) {
pr_err("restrack: %s", CUT_HERE);
dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
hash_for_each(res->hash, bkt, e, node) {
}
xa_for_each(&res->xa[i], index, e) {
if (rdma_is_kernel_res(e)) {
owner = e->kern_name;
} else {
......@@ -69,9 +98,15 @@ void rdma_restrack_clean(struct ib_device *dev)
}
pr_err("restrack: %s %s object allocated by %s is not freed\n",
rdma_is_kernel_res(e) ? "Kernel" : "User",
rdma_is_kernel_res(e) ? "Kernel" :
"User",
type2str(e->type), owner);
}
found = true;
}
xa_destroy(&res->xa[i]);
}
if (found)
pr_err("restrack: %s", CUT_HERE);
}
......@@ -86,10 +121,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
{
struct rdma_restrack_root *res = &dev->res;
struct rdma_restrack_entry *e;
unsigned long index = 0;
u32 cnt = 0;
down_read(&res->rwsem);
hash_for_each_possible(res->hash, e, node, type) {
xa_for_each(&res->xa[type], index, e) {
if (ns == &init_pid_ns ||
(!rdma_is_kernel_res(e) &&
ns == task_active_pid_ns(e->task)))
......@@ -166,16 +202,20 @@ EXPORT_SYMBOL(rdma_restrack_set_task);
static void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
int ret;
if (!dev)
return;
kref_init(&res->kref);
init_completion(&res->comp);
res->valid = true;
down_write(&dev->res.rwsem);
hash_add(dev->res.hash, &res->node, res->type);
ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res,
&dev->res.next_id[res->type]);
if (!ret)
res->valid = true;
up_write(&dev->res.rwsem);
}
......@@ -241,15 +281,14 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
if (!dev)
return;
rdma_restrack_put(res);
wait_for_completion(&res->comp);
down_write(&dev->res.rwsem);
hash_del(&res->node);
xa_erase(&dev->res.xa[res->type], res->id);
res->valid = false;
up_write(&dev->res.rwsem);
rdma_restrack_put(res);
wait_for_completion(&res->comp);
out:
if (res->task) {
put_task_struct(res->task);
......
......@@ -13,6 +13,7 @@
#include <linux/completion.h>
#include <linux/sched/task.h>
#include <uapi/rdma/rdma_netlink.h>
#include <linux/xarray.h>
/**
* enum rdma_restrack_type - HW objects to track
......@@ -48,7 +49,6 @@ enum rdma_restrack_type {
RDMA_RESTRACK_MAX
};
#define RDMA_RESTRACK_HASH_BITS 8
struct ib_device;
struct rdma_restrack_entry;
......@@ -62,9 +62,17 @@ struct rdma_restrack_root {
*/
struct rw_semaphore rwsem;
/**
* @hash: global database for all resources per-device
* @xa: Array of XArray structures to hold restrack entries.
* We want to use array of XArrays because insertion is type
* dependent. For types with xisiting unique ID (like QPN),
* we will insert to that unique index. For other types,
* we insert based on pointers and auto-allocate unique index.
*/
DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
struct xarray xa[RDMA_RESTRACK_MAX];
/**
* @next_id: Next ID to support cyclic allocation
*/
u32 next_id[RDMA_RESTRACK_MAX];
};
/**
......@@ -102,10 +110,6 @@ struct rdma_restrack_entry {
* @kern_name: name of owner for the kernel created entities.
*/
const char *kern_name;
/**
* @node: hash table entry
*/
struct hlist_node node;
/**
* @type: various objects in restrack database
*/
......@@ -114,6 +118,10 @@ struct rdma_restrack_entry {
* @user: user resource
*/
bool user;
/**
* @id: ID to expose to users
*/
u32 id;
};
void rdma_restrack_init(struct ib_device *dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment