Commit e951747a authored by Jason Gunthorpe's avatar Jason Gunthorpe

IB/uverbs: Rework the locking for cleaning up the ucontext

The locking here has always been a bit crazy and spread out, upon some
careful analysis we can simplify things.

Create a single function uverbs_destroy_ufile_hw() that internally handles
all locking. This pulls together pieces of this process that were
sprinkled all over the places into one place, and covers them with one
lock.

This eliminates several duplicate/confusing locks and makes the control
flow in ib_uverbs_close() and ib_uverbs_free_hw_resources() extremely
simple.

Unfortunately we have to keep an extra mutex, ucontext_lock.  This lock is
logically part of the rwsem and provides the 'down write, fail if write
locked, wait if read locked' semantic we require.
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 87064277
......@@ -32,6 +32,7 @@
#include <linux/file.h>
#include <linux/anon_inodes.h>
#include <linux/sched/mm.h>
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_types.h>
#include <linux/rcupdate.h>
......@@ -284,11 +285,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
}
ret = uverbs_try_lock_object(uobj, exclusive);
if (ret) {
WARN(uobj->ufile->cleanup_reason,
"ib_uverbs: Trying to lookup_get while cleanup context\n");
if (ret)
goto free;
}
return uobj;
free:
......@@ -694,6 +692,71 @@ void uverbs_close_fd(struct file *f)
uverbs_uobject_put(uobj);
}
static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
struct ib_device *ib_dev = ibcontext->device;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
if (!owning_process)
return;
owning_mm = get_task_mm(owning_process);
if (!owning_mm) {
pr_info("no mm, disassociate ucontext is pending task termination\n");
while (1) {
put_task_struct(owning_process);
usleep_range(1000, 2000);
owning_process = get_pid_task(ibcontext->tgid,
PIDTYPE_PID);
if (!owning_process ||
owning_process->state == TASK_DEAD) {
pr_info("disassociate ucontext done, task was terminated\n");
/* in case task was dead need to release the
* task struct.
*/
if (owning_process)
put_task_struct(owning_process);
return;
}
}
}
down_write(&owning_mm->mmap_sem);
ib_dev->disassociate_ucontext(ibcontext);
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
/*
* Drop the ucontext off the ufile and completely disconnect it from the
* ib_device
*/
static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
struct ib_ucontext *ucontext = ufile->ucontext;
int ret;
if (reason == RDMA_REMOVE_DRIVER_REMOVE)
ufile_disassociate_ucontext(ucontext);
put_pid(ucontext->tgid);
ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
RDMACG_RESOURCE_HCA_HANDLE);
/*
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
* the error return.
*/
ret = ucontext->device->dealloc_ucontext(ufile->ucontext);
WARN_ON(ret);
ufile->ucontext = NULL;
}
static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
......@@ -710,7 +773,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
* We take and release the lock per traversal in order to let
* other threads (which might still use the FDs) chance to run.
*/
ufile->cleanup_reason = reason;
list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
/*
* if we hit this WARN_ON, that means we are
......@@ -738,18 +800,43 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
return ret;
}
void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed)
/*
* Destroy the uncontext and every uobject associated with it. If called with
* reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
* been completed and ufile->ucontext is NULL.
*
* This is internally locked and can be called in parallel from multiple
* contexts.
*/
void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
enum rdma_remove_reason reason = device_removed ?
RDMA_REMOVE_DRIVER_REMOVE :
RDMA_REMOVE_CLOSE;
if (reason == RDMA_REMOVE_CLOSE) {
/*
* During destruction we might trigger something that
* synchronously calls release on any file descriptor. For
* this reason all paths that come from file_operations
* release must use try_lock. They can progress knowing that
* there is an ongoing uverbs_destroy_ufile_hw that will clean
* up the driver resources.
*/
if (!mutex_trylock(&ufile->ucontext_lock))
return;
} else {
mutex_lock(&ufile->ucontext_lock);
}
down_write(&ufile->hw_destroy_rwsem);
/*
* Waits for all remove_commit and alloc_commit to finish. Logically, We
* want to hold this forever as the context is going to be destroyed,
* but we'll release it since it causes a "held lock freed" BUG message.
* If a ucontext was never created then we can't have any uobjects to
* cleanup, nothing to do.
*/
down_write(&ufile->hw_destroy_rwsem);
if (!ufile->ucontext)
goto done;
ufile->ucontext->closing = true;
ufile->ucontext->cleanup_retryable = true;
while (!list_empty(&ufile->uobjects))
if (__uverbs_cleanup_ufile(ufile, reason)) {
......@@ -764,7 +851,11 @@ void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed)
if (!list_empty(&ufile->uobjects))
__uverbs_cleanup_ufile(ufile, reason);
ufile_destroy_ucontext(ufile, reason);
done:
up_write(&ufile->hw_destroy_rwsem);
mutex_unlock(&ufile->ucontext_lock);
}
const struct uverbs_obj_type_class uverbs_fd_class = {
......
......@@ -49,7 +49,8 @@ const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
uint16_t method);
void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed);
void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason);
/*
* uverbs_uobject_get is called in order to increase the reference count on
......
......@@ -136,9 +136,9 @@ struct ib_uverbs_completion_event_file {
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
struct mutex cleanup_mutex; /* protect cleanup */
struct ib_uverbs_device *device;
/* Protects writing to ucontext */
struct mutex ucontext_lock;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_async_event_file *async_file;
......@@ -155,8 +155,6 @@ struct ib_uverbs_file {
spinlock_t uobjects_lock;
struct list_head uobjects;
enum rdma_remove_reason cleanup_reason;
struct idr idr;
/* spinlock protects write access to idr */
spinlock_t idr_lock;
......
......@@ -84,7 +84,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
mutex_lock(&file->mutex);
mutex_lock(&file->ucontext_lock);
if (file->ucontext) {
ret = -EINVAL;
......@@ -150,7 +150,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
fd_install(resp.async_fd, filp);
mutex_unlock(&file->mutex);
mutex_unlock(&file->ucontext_lock);
return in_len;
......@@ -169,7 +169,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
err:
mutex_unlock(&file->mutex);
mutex_unlock(&file->ucontext_lock);
return ret;
}
......
......@@ -41,8 +41,6 @@
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/anon_inodes.h>
......@@ -227,21 +225,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
}
static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file,
bool device_removed)
{
struct ib_ucontext *context = file->ucontext;
context->closing = 1;
uverbs_cleanup_ufile(file, device_removed);
put_pid(context->tgid);
ib_rdmacg_uncharge(&context->cg_obj, context->device,
RDMACG_RESOURCE_HCA_HANDLE);
return context->device->dealloc_ucontext(context);
}
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
complete(&dev->comp);
......@@ -886,8 +869,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
spin_lock_init(&file->idr_lock);
idr_init(&file->idr);
kref_init(&file->ref);
mutex_init(&file->mutex);
mutex_init(&file->cleanup_mutex);
mutex_init(&file->ucontext_lock);
spin_lock_init(&file->uobjects_lock);
INIT_LIST_HEAD(&file->uobjects);
......@@ -917,12 +899,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
ib_uverbs_cleanup_ufile(file, false);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
idr_destroy(&file->idr);
mutex_lock(&file->device->lists_mutex);
......@@ -1109,44 +1086,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
return;
}
static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
struct ib_device *ib_dev = ibcontext->device;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
if (!owning_process)
return;
owning_mm = get_task_mm(owning_process);
if (!owning_mm) {
pr_info("no mm, disassociate ucontext is pending task termination\n");
while (1) {
put_task_struct(owning_process);
usleep_range(1000, 2000);
owning_process = get_pid_task(ibcontext->tgid,
PIDTYPE_PID);
if (!owning_process ||
owning_process->state == TASK_DEAD) {
pr_info("disassociate ucontext done, task was terminated\n");
/* in case task was dead need to release the
* task struct.
*/
if (owning_process)
put_task_struct(owning_process);
return;
}
}
}
down_write(&owning_mm->mmap_sem);
ib_dev->disassociate_ucontext(ibcontext);
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
......@@ -1162,39 +1101,24 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
struct ib_ucontext *ucontext;
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
file->is_closed = 1;
list_del(&file->list);
kref_get(&file->ref);
mutex_unlock(&uverbs_dev->lists_mutex);
mutex_lock(&file->cleanup_mutex);
ucontext = file->ucontext;
file->ucontext = NULL;
mutex_unlock(&file->cleanup_mutex);
/* At this point ib_uverbs_close cannot be running
* ib_uverbs_cleanup_ufile
/* We must release the mutex before going ahead and calling
* uverbs_cleanup_ufile, as it might end up indirectly calling
* uverbs_close, for example due to freeing the resources (e.g
* mmput).
*/
if (ucontext) {
/* We must release the mutex before going ahead and
* calling disassociate_ucontext. disassociate_ucontext
* might end up indirectly calling uverbs_close,
* for example due to freeing the resources
* (e.g mmput).
*/
ib_uverbs_event_handler(&file->event_handler, &event);
ib_uverbs_disassociate_ucontext(ucontext);
mutex_lock(&file->cleanup_mutex);
ib_uverbs_cleanup_ufile(file, true);
mutex_unlock(&file->cleanup_mutex);
}
mutex_unlock(&uverbs_dev->lists_mutex);
mutex_lock(&uverbs_dev->lists_mutex);
ib_uverbs_event_handler(&file->event_handler, &event);
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
kref_put(&file->ref, ib_uverbs_release_file);
mutex_lock(&uverbs_dev->lists_mutex);
}
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
......
......@@ -1479,6 +1479,11 @@ struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
/*
* 'closing' can be read by the driver only during a destroy callback,
* it is set when we are closing the file descriptor and indicates
* that mm_sem may be locked.
*/
int closing;
bool cleanup_retryable;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment