uverbs_main.c 36.9 KB
Newer Older
1 2
/*
 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4 5
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6
 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/poll.h>
43
#include <linux/sched.h>
44
#include <linux/file.h>
45
#include <linux/cdev.h>
46
#include <linux/anon_inodes.h>
47
#include <linux/slab.h>
48 49 50

#include <asm/uaccess.h>

51 52
#include <rdma/ib.h>

53 54 55 56 57 58 59 60 61 62 63 64 65 66
#include "uverbs.h"

MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
MODULE_LICENSE("Dual BSD/GPL");

enum {
	IB_UVERBS_MAJOR       = 231,
	IB_UVERBS_BASE_MINOR  = 192,
	IB_UVERBS_MAX_DEVICES = 32
};

#define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)

67 68
static struct class *uverbs_class;

69
DEFINE_SPINLOCK(ib_uverbs_idr_lock);
70 71 72 73 74 75
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
DEFINE_IDR(ib_uverbs_mw_idr);
DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
76
DEFINE_IDR(ib_uverbs_srq_idr);
77
DEFINE_IDR(ib_uverbs_xrcd_idr);
78
DEFINE_IDR(ib_uverbs_rule_idr);
Yishai Hadas's avatar
Yishai Hadas committed
79
DEFINE_IDR(ib_uverbs_wq_idr);
80
DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
81

82
static DEFINE_SPINLOCK(map_lock);
83 84 85
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);

static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
86
				     struct ib_device *ib_dev,
87 88
				     const char __user *buf, int in_len,
				     int out_len) = {
89 90 91 92 93 94
	[IB_USER_VERBS_CMD_GET_CONTEXT]		= ib_uverbs_get_context,
	[IB_USER_VERBS_CMD_QUERY_DEVICE]	= ib_uverbs_query_device,
	[IB_USER_VERBS_CMD_QUERY_PORT]		= ib_uverbs_query_port,
	[IB_USER_VERBS_CMD_ALLOC_PD]		= ib_uverbs_alloc_pd,
	[IB_USER_VERBS_CMD_DEALLOC_PD]		= ib_uverbs_dealloc_pd,
	[IB_USER_VERBS_CMD_REG_MR]		= ib_uverbs_reg_mr,
95
	[IB_USER_VERBS_CMD_REREG_MR]		= ib_uverbs_rereg_mr,
96
	[IB_USER_VERBS_CMD_DEREG_MR]		= ib_uverbs_dereg_mr,
97 98
	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw,
	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,
99
	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
	[IB_USER_VERBS_CMD_CREATE_CQ]		= ib_uverbs_create_cq,
	[IB_USER_VERBS_CMD_RESIZE_CQ]		= ib_uverbs_resize_cq,
	[IB_USER_VERBS_CMD_POLL_CQ]		= ib_uverbs_poll_cq,
	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]	= ib_uverbs_req_notify_cq,
	[IB_USER_VERBS_CMD_DESTROY_CQ]		= ib_uverbs_destroy_cq,
	[IB_USER_VERBS_CMD_CREATE_QP]		= ib_uverbs_create_qp,
	[IB_USER_VERBS_CMD_QUERY_QP]		= ib_uverbs_query_qp,
	[IB_USER_VERBS_CMD_MODIFY_QP]		= ib_uverbs_modify_qp,
	[IB_USER_VERBS_CMD_DESTROY_QP]		= ib_uverbs_destroy_qp,
	[IB_USER_VERBS_CMD_POST_SEND]		= ib_uverbs_post_send,
	[IB_USER_VERBS_CMD_POST_RECV]		= ib_uverbs_post_recv,
	[IB_USER_VERBS_CMD_POST_SRQ_RECV]	= ib_uverbs_post_srq_recv,
	[IB_USER_VERBS_CMD_CREATE_AH]		= ib_uverbs_create_ah,
	[IB_USER_VERBS_CMD_DESTROY_AH]		= ib_uverbs_destroy_ah,
	[IB_USER_VERBS_CMD_ATTACH_MCAST]	= ib_uverbs_attach_mcast,
	[IB_USER_VERBS_CMD_DETACH_MCAST]	= ib_uverbs_detach_mcast,
	[IB_USER_VERBS_CMD_CREATE_SRQ]		= ib_uverbs_create_srq,
	[IB_USER_VERBS_CMD_MODIFY_SRQ]		= ib_uverbs_modify_srq,
	[IB_USER_VERBS_CMD_QUERY_SRQ]		= ib_uverbs_query_srq,
	[IB_USER_VERBS_CMD_DESTROY_SRQ]		= ib_uverbs_destroy_srq,
120 121
	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd,
	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,
122
	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq,
123
	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp,
124 125 126
};

static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
127
				    struct ib_device *ib_dev,
128 129 130
				    struct ib_udata *ucore,
				    struct ib_udata *uhw) = {
	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
131
	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow,
132
	[IB_USER_VERBS_EX_CMD_QUERY_DEVICE]	= ib_uverbs_ex_query_device,
133
	[IB_USER_VERBS_EX_CMD_CREATE_CQ]	= ib_uverbs_ex_create_cq,
134
	[IB_USER_VERBS_EX_CMD_CREATE_QP]        = ib_uverbs_ex_create_qp,
Yishai Hadas's avatar
Yishai Hadas committed
135 136 137
	[IB_USER_VERBS_EX_CMD_CREATE_WQ]        = ib_uverbs_ex_create_wq,
	[IB_USER_VERBS_EX_CMD_MODIFY_WQ]        = ib_uverbs_ex_modify_wq,
	[IB_USER_VERBS_EX_CMD_DESTROY_WQ]       = ib_uverbs_ex_destroy_wq,
138 139
	[IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
	[IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
140 141 142
};

static void ib_uverbs_add_one(struct ib_device *device);
143
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
144

145 146 147 148 149 150 151 152 153 154 155
int uverbs_dealloc_mw(struct ib_mw *mw)
{
	struct ib_pd *pd = mw->pd;
	int ret;

	ret = mw->device->dealloc_mw(mw);
	if (!ret)
		atomic_dec(&pd->usecnt);
	return ret;
}

156
static void ib_uverbs_release_dev(struct kobject *kobj)
157 158
{
	struct ib_uverbs_device *dev =
159
		container_of(kobj, struct ib_uverbs_device, kobj);
160

161
	cleanup_srcu_struct(&dev->disassociate_srcu);
162
	kfree(dev);
163 164
}

165 166 167 168
static struct kobj_type ib_uverbs_dev_ktype = {
	.release = ib_uverbs_release_dev,
};

169 170 171 172 173 174 175 176
static void ib_uverbs_release_event_file(struct kref *ref)
{
	struct ib_uverbs_event_file *file =
		container_of(ref, struct ib_uverbs_event_file, ref);

	kfree(file);
}

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
			  struct ib_uverbs_event_file *ev_file,
			  struct ib_ucq_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	if (ev_file) {
		spin_lock_irq(&ev_file->lock);
		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
			list_del(&evt->list);
			kfree(evt);
		}
		spin_unlock_irq(&ev_file->lock);

		kref_put(&ev_file->ref, ib_uverbs_release_event_file);
	}

	spin_lock_irq(&file->async_file->lock);
	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
	spin_unlock_irq(&file->async_file->lock);
}

void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
			      struct ib_uevent_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	spin_lock_irq(&file->async_file->lock);
	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
	spin_unlock_irq(&file->async_file->lock);
}

215 216 217 218 219 220 221 222 223 224 225 226
static void ib_uverbs_detach_umcast(struct ib_qp *qp,
				    struct ib_uqp_object *uobj)
{
	struct ib_uverbs_mcast_entry *mcast, *tmp;

	list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
		ib_detach_mcast(qp, &mcast->gid, mcast->lid);
		list_del(&mcast->list);
		kfree(mcast);
	}
}

227 228
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
				      struct ib_ucontext *context)
229 230 231
{
	struct ib_uobject *uobj, *tmp;

232 233
	context->closing = 1;

234
	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
235 236 237
		struct ib_ah *ah = uobj->object;

		idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
238 239 240
		ib_destroy_ah(ah);
		kfree(uobj);
	}
241

242 243 244 245 246
	/* Remove MWs before QPs, in order to support type 2A MWs. */
	list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
		struct ib_mw *mw = uobj->object;

		idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
247
		uverbs_dealloc_mw(mw);
248 249 250
		kfree(uobj);
	}

251 252 253 254 255 256 257 258
	list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
		struct ib_flow *flow_id = uobj->object;

		idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
		ib_destroy_flow(flow_id);
		kfree(uobj);
	}

259
	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
260
		struct ib_qp *qp = uobj->object;
261 262
		struct ib_uqp_object *uqp =
			container_of(uobj, struct ib_uqp_object, uevent.uobject);
263 264

		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
265
		if (qp == qp->real_qp)
266
			ib_uverbs_detach_umcast(qp, uqp);
267
		ib_destroy_qp(qp);
268 269
		ib_uverbs_release_uevent(file, &uqp->uevent);
		kfree(uqp);
270 271
	}

272 273 274 275 276 277 278 279 280 281
	list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
		struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
		struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;

		idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
		ib_destroy_rwq_ind_table(rwq_ind_tbl);
		kfree(ind_tbl);
		kfree(uobj);
	}

Yishai Hadas's avatar
Yishai Hadas committed
282 283 284 285 286 287 288 289 290 291 292
	list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
		struct ib_wq *wq = uobj->object;
		struct ib_uwq_object *uwq =
			container_of(uobj, struct ib_uwq_object, uevent.uobject);

		idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
		ib_destroy_wq(wq);
		ib_uverbs_release_uevent(file, &uwq->uevent);
		kfree(uwq);
	}

293 294 295 296 297 298 299 300 301 302 303
	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
		struct ib_srq *srq = uobj->object;
		struct ib_uevent_object *uevent =
			container_of(uobj, struct ib_uevent_object, uobject);

		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
		ib_destroy_srq(srq);
		ib_uverbs_release_uevent(file, uevent);
		kfree(uevent);
	}

304
	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
305
		struct ib_cq *cq = uobj->object;
306 307 308
		struct ib_uverbs_event_file *ev_file = cq->cq_context;
		struct ib_ucq_object *ucq =
			container_of(uobj, struct ib_ucq_object, uobject);
309 310

		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
311
		ib_destroy_cq(cq);
312 313
		ib_uverbs_release_ucq(file, ev_file, ucq);
		kfree(ucq);
314 315 316
	}

	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
317
		struct ib_mr *mr = uobj->object;
318

319
		idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
320
		ib_dereg_mr(mr);
321
		kfree(uobj);
322 323
	}

324 325 326 327 328 329 330 331 332 333 334 335
	mutex_lock(&file->device->xrcd_tree_mutex);
	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
		struct ib_xrcd *xrcd = uobj->object;
		struct ib_uxrcd_object *uxrcd =
			container_of(uobj, struct ib_uxrcd_object, uobject);

		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
		ib_uverbs_dealloc_xrcd(file->device, xrcd);
		kfree(uxrcd);
	}
	mutex_unlock(&file->device->xrcd_tree_mutex);

336
	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
337 338 339
		struct ib_pd *pd = uobj->object;

		idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
340 341 342 343
		ib_dealloc_pd(pd);
		kfree(uobj);
	}

344 345
	put_pid(context->tgid);

346 347 348
	return context->device->dealloc_ucontext(context);
}

349 350 351 352 353
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
	complete(&dev->comp);
}

354 355 356 357
static void ib_uverbs_release_file(struct kref *ref)
{
	struct ib_uverbs_file *file =
		container_of(ref, struct ib_uverbs_file, ref);
358 359 360 361 362 363 364 365 366
	struct ib_device *ib_dev;
	int srcu_key;

	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (ib_dev && !ib_dev->disassociate_ucontext)
		module_put(ib_dev->owner);
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
367

368 369
	if (atomic_dec_and_test(&file->device->refcount))
		ib_uverbs_comp_dev(file->device);
370

371 372 373 374 375 376 377
	kfree(file);
}

static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
				    size_t count, loff_t *pos)
{
	struct ib_uverbs_event_file *file = filp->private_data;
378
	struct ib_uverbs_event *event;
379 380 381 382 383
	int eventsz;
	int ret = 0;

	spin_lock_irq(&file->lock);

384
	while (list_empty(&file->event_list)) {
385 386 387 388 389 390
		spin_unlock_irq(&file->lock);

		if (filp->f_flags & O_NONBLOCK)
			return -EAGAIN;

		if (wait_event_interruptible(file->poll_wait,
391 392 393 394 395 396
					     (!list_empty(&file->event_list) ||
			/* The barriers built into wait_event_interruptible()
			 * and wake_up() guarentee this will see the null set
			 * without using RCU
			 */
					     !file->uverbs_file->device->ib_dev)))
397 398
			return -ERESTARTSYS;

399 400 401 402 403
		/* If device was disassociated and no event exists set an error */
		if (list_empty(&file->event_list) &&
		    !file->uverbs_file->device->ib_dev)
			return -EIO;

404 405 406
		spin_lock_irq(&file->lock);
	}

407 408 409
	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);

	if (file->is_async)
410
		eventsz = sizeof (struct ib_uverbs_async_event_desc);
411
	else
412 413 414 415 416
		eventsz = sizeof (struct ib_uverbs_comp_event_desc);

	if (eventsz > count) {
		ret   = -EINVAL;
		event = NULL;
417
	} else {
418
		list_del(file->event_list.next);
419 420 421 422 423
		if (event->counter) {
			++(*event->counter);
			list_del(&event->obj_list);
		}
	}
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447

	spin_unlock_irq(&file->lock);

	if (event) {
		if (copy_to_user(buf, event, eventsz))
			ret = -EFAULT;
		else
			ret = eventsz;
	}

	kfree(event);

	return ret;
}

static unsigned int ib_uverbs_event_poll(struct file *filp,
					 struct poll_table_struct *wait)
{
	unsigned int pollflags = 0;
	struct ib_uverbs_event_file *file = filp->private_data;

	poll_wait(filp, &file->poll_wait, wait);

	spin_lock_irq(&file->lock);
448
	if (!list_empty(&file->event_list))
449 450 451 452 453 454
		pollflags = POLLIN | POLLRDNORM;
	spin_unlock_irq(&file->lock);

	return pollflags;
}

455 456 457 458 459 460 461
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
{
	struct ib_uverbs_event_file *file = filp->private_data;

	return fasync_helper(fd, filp, on, &file->async_queue);
}

462 463 464
static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_event_file *file = filp->private_data;
465
	struct ib_uverbs_event *entry, *tmp;
466
	int closed_already = 0;
467

468
	mutex_lock(&file->uverbs_file->device->lists_mutex);
469
	spin_lock_irq(&file->lock);
470
	closed_already = file->is_closed;
471
	file->is_closed = 1;
472 473 474 475 476 477
	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
		if (entry->counter)
			list_del(&entry->obj_list);
		kfree(entry);
	}
	spin_unlock_irq(&file->lock);
478 479 480 481 482 483 484
	if (!closed_already) {
		list_del(&file->list);
		if (file->is_async)
			ib_unregister_event_handler(&file->uverbs_file->
				event_handler);
	}
	mutex_unlock(&file->uverbs_file->device->lists_mutex);
485

486
	kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
487
	kref_put(&file->ref, ib_uverbs_release_event_file);
488 489 490 491

	return 0;
}

492
static const struct file_operations uverbs_event_fops = {
493
	.owner	 = THIS_MODULE,
494
	.read	 = ib_uverbs_event_read,
495
	.poll    = ib_uverbs_event_poll,
496
	.release = ib_uverbs_event_close,
497 498
	.fasync  = ib_uverbs_event_fasync,
	.llseek	 = no_llseek,
499 500 501 502
};

void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
503 504 505 506 507 508 509 510 511
	struct ib_uverbs_event_file    *file = cq_context;
	struct ib_ucq_object	       *uobj;
	struct ib_uverbs_event	       *entry;
	unsigned long			flags;

	if (!file)
		return;

	spin_lock_irqsave(&file->lock, flags);
512
	if (file->is_closed) {
513 514 515
		spin_unlock_irqrestore(&file->lock, flags);
		return;
	}
516 517

	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
518 519
	if (!entry) {
		spin_unlock_irqrestore(&file->lock, flags);
520
		return;
521
	}
522

523 524 525 526
	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);

	entry->desc.comp.cq_handle = cq->uobject->user_handle;
	entry->counter		   = &uobj->comp_events_reported;
527

528
	list_add_tail(&entry->list, &file->event_list);
529
	list_add_tail(&entry->obj_list, &uobj->comp_list);
530
	spin_unlock_irqrestore(&file->lock, flags);
531

532 533
	wake_up_interruptible(&file->poll_wait);
	kill_fasync(&file->async_queue, SIGIO, POLL_IN);
534 535 536
}

static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
537 538 539
				    __u64 element, __u64 event,
				    struct list_head *obj_list,
				    u32 *counter)
540
{
541
	struct ib_uverbs_event *entry;
542 543
	unsigned long flags;

544
	spin_lock_irqsave(&file->async_file->lock, flags);
545
	if (file->async_file->is_closed) {
546 547 548 549
		spin_unlock_irqrestore(&file->async_file->lock, flags);
		return;
	}

550
	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
551 552
	if (!entry) {
		spin_unlock_irqrestore(&file->async_file->lock, flags);
553
		return;
554
	}
555

556 557
	entry->desc.async.element    = element;
	entry->desc.async.event_type = event;
558
	entry->desc.async.reserved   = 0;
559
	entry->counter               = counter;
560

561
	list_add_tail(&entry->list, &file->async_file->event_list);
562 563
	if (obj_list)
		list_add_tail(&entry->obj_list, obj_list);
564
	spin_unlock_irqrestore(&file->async_file->lock, flags);
565

566 567
	wake_up_interruptible(&file->async_file->poll_wait);
	kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
568 569 570 571
}

void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
572 573
	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
						  struct ib_ucq_object, uobject);
574

575
	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
576 577
				event->event, &uobj->async_list,
				&uobj->async_events_reported);
578 579 580 581
}

void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
582 583
	struct ib_uevent_object *uobj;

584 585 586 587
	/* for XRC target qp's, check that qp is live */
	if (!event->element.qp->uobject || !event->element.qp->uobject->live)
		return;

588 589 590 591 592 593
	uobj = container_of(event->element.qp->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
594 595
}

Yishai Hadas's avatar
Yishai Hadas committed
596 597 598 599 600 601 602 603 604 605
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
{
	struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
						  struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
}

606 607
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
608 609 610 611 612 613 614 615
	struct ib_uevent_object *uobj;

	uobj = container_of(event->element.srq->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
616 617
}

618 619
void ib_uverbs_event_handler(struct ib_event_handler *handler,
			     struct ib_event *event)
620 621 622 623
{
	struct ib_uverbs_file *file =
		container_of(handler, struct ib_uverbs_file, event_handler);

624 625
	ib_uverbs_async_handler(file, event->element.port_num, event->event,
				NULL, NULL);
626 627
}

628 629 630 631 632 633
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
	kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
	file->async_file = NULL;
}

634
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
635
					struct ib_device	*ib_dev,
636
					int is_async)
637
{
638
	struct ib_uverbs_event_file *ev_file;
639
	struct file *filp;
640
	int ret;
641

642
	ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
643 644 645 646 647 648 649 650
	if (!ev_file)
		return ERR_PTR(-ENOMEM);

	kref_init(&ev_file->ref);
	spin_lock_init(&ev_file->lock);
	INIT_LIST_HEAD(&ev_file->event_list);
	init_waitqueue_head(&ev_file->poll_wait);
	ev_file->uverbs_file = uverbs_file;
651
	kref_get(&ev_file->uverbs_file->ref);
652
	ev_file->async_queue = NULL;
653
	ev_file->is_closed   = 0;
654

655
	filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
656
				  ev_file, O_RDONLY);
657
	if (IS_ERR(filp))
658 659
		goto err_put_refs;

660 661 662 663 664
	mutex_lock(&uverbs_file->device->lists_mutex);
	list_add_tail(&ev_file->list,
		      &uverbs_file->device->uverbs_events_file_list);
	mutex_unlock(&uverbs_file->device->lists_mutex);

665 666 667 668 669
	if (is_async) {
		WARN_ON(uverbs_file->async_file);
		uverbs_file->async_file = ev_file;
		kref_get(&uverbs_file->async_file->ref);
		INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
670
				      ib_dev,
671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
				      ib_uverbs_event_handler);
		ret = ib_register_event_handler(&uverbs_file->event_handler);
		if (ret)
			goto err_put_file;

		/* At that point async file stuff was fully set */
		ev_file->is_async = 1;
	}

	return filp;

err_put_file:
	fput(filp);
	kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
	uverbs_file->async_file = NULL;
	return ERR_PTR(ret);
687

688 689 690
err_put_refs:
	kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
	kref_put(&ev_file->ref, ib_uverbs_release_event_file);
691 692 693 694 695 696 697 698 699 700 701
	return filp;
}

/*
 * Look up a completion event file by FD.  If lookup is successful,
 * takes a ref to the event file struct that it returns; if
 * unsuccessful, returns NULL.
 */
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
{
	struct ib_uverbs_event_file *ev_file = NULL;
702
	struct fd f = fdget(fd);
703

704
	if (!f.file)
705 706
		return NULL;

707
	if (f.file->f_op != &uverbs_event_fops)
708 709
		goto out;

710
	ev_file = f.file->private_data;
711 712 713 714 715 716 717 718
	if (ev_file->is_async) {
		ev_file = NULL;
		goto out;
	}

	kref_get(&ev_file->ref);

out:
719
	fdput(f);
720
	return ev_file;
721 722
}

723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
	u64 mask;

	if (command <= IB_USER_VERBS_CMD_OPEN_QP)
		mask = ib_dev->uverbs_cmd_mask;
	else
		mask = ib_dev->uverbs_ex_cmd_mask;

	if (mask & ((u64)1 << command))
		return 0;

	return -1;
}

738 739 740 741
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
			     size_t count, loff_t *pos)
{
	struct ib_uverbs_file *file = filp->private_data;
742
	struct ib_device *ib_dev;
743
	struct ib_uverbs_cmd_hdr hdr;
Eli Cohen's avatar
Eli Cohen committed
744
	__u32 command;
745
	__u32 flags;
746 747
	int srcu_key;
	ssize_t ret;
748

749 750 751
	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
		return -EACCES;

752 753 754 755 756 757
	if (count < sizeof hdr)
		return -EINVAL;

	if (copy_from_user(&hdr, buf, sizeof hdr))
		return -EFAULT;

758 759 760 761 762 763 764 765
	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
		goto out;
	}

Eli Cohen's avatar
Eli Cohen committed
766 767 768 769 770 771 772
	if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
				   IB_USER_VERBS_CMD_COMMAND_MASK)) {
		ret = -EINVAL;
		goto out;
	}

	command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
773 774 775 776
	if (verify_command_mask(ib_dev, command)) {
		ret = -EOPNOTSUPP;
		goto out;
	}
Eli Cohen's avatar
Eli Cohen committed
777

778 779 780 781 782 783
	if (!file->ucontext &&
	    command != IB_USER_VERBS_CMD_GET_CONTEXT) {
		ret = -EINVAL;
		goto out;
	}

784 785
	flags = (hdr.command &
		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
786

787 788
	if (!flags) {
		if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
789 790 791 792
		    !uverbs_cmd_table[command]) {
			ret = -EINVAL;
			goto out;
		}
793

794 795 796 797
		if (hdr.in_words * 4 != count) {
			ret = -EINVAL;
			goto out;
		}
798

799
		ret = uverbs_cmd_table[command](file, ib_dev,
800 801 802 803 804 805 806 807 808 809 810
						 buf + sizeof(hdr),
						 hdr.in_words * 4,
						 hdr.out_words * 4);

	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
		struct ib_uverbs_ex_cmd_hdr ex_hdr;
		struct ib_udata ucore;
		struct ib_udata uhw;
		size_t written_count = count;

		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
811 812 813 814
		    !uverbs_ex_cmd_table[command]) {
			ret = -ENOSYS;
			goto out;
		}
815

816 817 818 819
		if (!file->ucontext) {
			ret = -EINVAL;
			goto out;
		}
820

821 822 823 824
		if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
			ret = -EINVAL;
			goto out;
		}
825

826 827 828 829
		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
			ret = -EFAULT;
			goto out;
		}
830 831 832 833

		count -= sizeof(hdr) + sizeof(ex_hdr);
		buf += sizeof(hdr) + sizeof(ex_hdr);

834 835 836 837
		if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
			ret = -EINVAL;
			goto out;
		}
838

839 840 841 842
		if (ex_hdr.cmd_hdr_reserved) {
			ret = -EINVAL;
			goto out;
		}
843

844
		if (ex_hdr.response) {
845 846 847 848
			if (!hdr.out_words && !ex_hdr.provider_out_words) {
				ret = -EINVAL;
				goto out;
			}
849 850 851

			if (!access_ok(VERIFY_WRITE,
				       (void __user *) (unsigned long) ex_hdr.response,
852 853 854 855
				       (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
				ret = -EFAULT;
				goto out;
			}
856
		} else {
857 858 859 860
			if (hdr.out_words || ex_hdr.provider_out_words) {
				ret = -EINVAL;
				goto out;
			}
861 862
		}

863 864 865 866 867 868 869 870
		INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
				       hdr.in_words * 8, hdr.out_words * 8);

		INIT_UDATA_BUF_OR_NULL(&uhw,
				       buf + ucore.inlen,
				       (unsigned long) ex_hdr.response + ucore.outlen,
				       ex_hdr.provider_in_words * 8,
				       ex_hdr.provider_out_words * 8);
871

872
		ret = uverbs_ex_cmd_table[command](file,
873
						   ib_dev,
874 875
						   &ucore,
						   &uhw);
876 877 878 879
		if (!ret)
			ret = written_count;
	} else {
		ret = -ENOSYS;
880
	}
881

882 883 884
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
885 886 887 888 889
}

static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct ib_uverbs_file *file = filp->private_data;
890 891 892
	struct ib_device *ib_dev;
	int ret = 0;
	int srcu_key;
893

894 895 896 897 898 899 900 901 902 903
	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
		goto out;
	}

	if (!file->ucontext)
		ret = -ENODEV;
904
	else
905 906 907 908
		ret = ib_dev->mmap(file->ucontext, vma);
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
909 910
}

911 912 913
/*
 * ib_uverbs_open() does not need the BKL:
 *
914
 *  - the ib_uverbs_device structures are properly reference counted and
915 916 917
 *    everything else is purely local to the file being created, so
 *    races against other open calls are not a problem;
 *  - there is no ioctl method to race against;
918 919
 *  - the open method will either immediately run -ENXIO, or all
 *    required initialization will be done.
920
 */
921 922
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
923
	struct ib_uverbs_device *dev;
924
	struct ib_uverbs_file *file;
925
	struct ib_device *ib_dev;
926
	int ret;
927 928
	int module_dependent;
	int srcu_key;
929

930
	dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
931
	if (!atomic_inc_not_zero(&dev->refcount))
932 933
		return -ENXIO;

934 935 936 937 938 939
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	mutex_lock(&dev->lists_mutex);
	ib_dev = srcu_dereference(dev->ib_dev,
				  &dev->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
940 941
		goto err;
	}
942

943 944 945 946 947 948 949 950 951 952 953 954 955
	/* In case IB device supports disassociate ucontext, there is no hard
	 * dependency between uverbs device and its low level device.
	 */
	module_dependent = !(ib_dev->disassociate_ucontext);

	if (module_dependent) {
		if (!try_module_get(ib_dev->owner)) {
			ret = -ENODEV;
			goto err;
		}
	}

	file = kzalloc(sizeof(*file), GFP_KERNEL);
956
	if (!file) {
957
		ret = -ENOMEM;
958 959 960 961
		if (module_dependent)
			goto err_module;

		goto err;
962
	}
963

964 965 966
	file->device	 = dev;
	file->ucontext	 = NULL;
	file->async_file = NULL;
967
	kref_init(&file->ref);
968
	mutex_init(&file->mutex);
969
	mutex_init(&file->cleanup_mutex);
970 971

	filp->private_data = file;
972
	kobject_get(&dev->kobj);
973 974 975
	list_add_tail(&file->list, &dev->uverbs_file_list);
	mutex_unlock(&dev->lists_mutex);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
976

977
	return nonseekable_open(inode, filp);
978 979

err_module:
980
	module_put(ib_dev->owner);
981 982

err:
983 984
	mutex_unlock(&dev->lists_mutex);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
985 986 987
	if (atomic_dec_and_test(&dev->refcount))
		ib_uverbs_comp_dev(dev);

988
	return ret;
989 990 991 992 993
}

static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_file *file = filp->private_data;
994
	struct ib_uverbs_device *dev = file->device;
995 996 997 998 999 1000 1001

	mutex_lock(&file->cleanup_mutex);
	if (file->ucontext) {
		ib_uverbs_cleanup_ucontext(file, file->ucontext);
		file->ucontext = NULL;
	}
	mutex_unlock(&file->cleanup_mutex);
1002 1003 1004 1005 1006 1007 1008

	mutex_lock(&file->device->lists_mutex);
	if (!file->is_closed) {
		list_del(&file->list);
		file->is_closed = 1;
	}
	mutex_unlock(&file->device->lists_mutex);
1009 1010 1011

	if (file->async_file)
		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
1012 1013

	kref_put(&file->ref, ib_uverbs_release_file);
1014
	kobject_put(&dev->kobj);
1015 1016 1017 1018

	return 0;
}

1019
static const struct file_operations uverbs_fops = {
1020 1021 1022
	.owner	 = THIS_MODULE,
	.write	 = ib_uverbs_write,
	.open	 = ib_uverbs_open,
1023 1024
	.release = ib_uverbs_close,
	.llseek	 = no_llseek,
1025 1026
};

1027
static const struct file_operations uverbs_mmap_fops = {
1028 1029
	.owner	 = THIS_MODULE,
	.write	 = ib_uverbs_write,
1030
	.mmap    = ib_uverbs_mmap,
1031
	.open	 = ib_uverbs_open,
1032 1033
	.release = ib_uverbs_close,
	.llseek	 = no_llseek,
1034 1035 1036 1037 1038 1039 1040 1041
};

static struct ib_client uverbs_client = {
	.name   = "uverbs",
	.add    = ib_uverbs_add_one,
	.remove = ib_uverbs_remove_one
};

1042 1043
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
			  char *buf)
1044
{
1045 1046
	int ret = -ENODEV;
	int srcu_key;
1047
	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1048
	struct ib_device *ib_dev;
1049 1050 1051

	if (!dev)
		return -ENODEV;
1052

1053 1054 1055 1056 1057 1058 1059
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
	if (ib_dev)
		ret = sprintf(buf, "%s\n", ib_dev->name);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);

	return ret;
1060
}
1061
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1062

1063 1064
static ssize_t show_dev_abi_version(struct device *device,
				    struct device_attribute *attr, char *buf)
1065
{
1066
	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1067 1068 1069
	int ret = -ENODEV;
	int srcu_key;
	struct ib_device *ib_dev;
1070 1071 1072

	if (!dev)
		return -ENODEV;
1073 1074 1075 1076 1077
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
	if (ib_dev)
		ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1078

1079
	return ret;
1080
}
1081
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1082

1083 1084
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
			 __stringify(IB_USER_VERBS_ABI_VERSION));
1085

1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
static dev_t overflow_maj;
static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);

/*
 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
 * requesting a new major number and doubling the number of max devices we
 * support. It's stupid, but simple.
 */
static int find_overflow_devnum(void)
{
	int ret;

	if (!overflow_maj) {
		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
					  "infiniband_verbs");
		if (ret) {
1102
			pr_err("user_verbs: couldn't register dynamic device number\n");
1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
			return ret;
		}
	}

	ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
	if (ret >= IB_UVERBS_MAX_DEVICES)
		return -1;

	return ret;
}

1114 1115
static void ib_uverbs_add_one(struct ib_device *device)
{
1116
	int devnum;
1117
	dev_t base;
1118
	struct ib_uverbs_device *uverbs_dev;
1119
	int ret;
1120 1121 1122 1123

	if (!device->alloc_ucontext)
		return;

Roland Dreier's avatar
Roland Dreier committed
1124
	uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
1125 1126 1127
	if (!uverbs_dev)
		return;

1128 1129 1130 1131 1132 1133
	ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
	if (ret) {
		kfree(uverbs_dev);
		return;
	}

1134
	atomic_set(&uverbs_dev->refcount, 1);
1135
	init_completion(&uverbs_dev->comp);
1136 1137
	uverbs_dev->xrcd_tree = RB_ROOT;
	mutex_init(&uverbs_dev->xrcd_tree_mutex);
1138
	kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
1139 1140 1141
	mutex_init(&uverbs_dev->lists_mutex);
	INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
	INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1142

1143
	spin_lock(&map_lock);
1144 1145
	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
	if (devnum >= IB_UVERBS_MAX_DEVICES) {
1146
		spin_unlock(&map_lock);
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
		devnum = find_overflow_devnum();
		if (devnum < 0)
			goto err;

		spin_lock(&map_lock);
		uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
		base = devnum + overflow_maj;
		set_bit(devnum, overflow_map);
	} else {
		uverbs_dev->devnum = devnum;
		base = devnum + IB_UVERBS_BASE_DEV;
		set_bit(devnum, dev_map);
1159 1160 1161
	}
	spin_unlock(&map_lock);

1162
	rcu_assign_pointer(uverbs_dev->ib_dev, device);
1163
	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1164

1165 1166 1167
	cdev_init(&uverbs_dev->cdev, NULL);
	uverbs_dev->cdev.owner = THIS_MODULE;
	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
1168
	uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
1169
	kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
1170
	if (cdev_add(&uverbs_dev->cdev, base, 1))
1171
		goto err_cdev;
1172

1173
	uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
1174
					uverbs_dev->cdev.dev, uverbs_dev,
1175
					"uverbs%d", uverbs_dev->devnum);
1176
	if (IS_ERR(uverbs_dev->dev))
1177 1178
		goto err_cdev;

1179
	if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
1180
		goto err_class;
1181
	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1182
		goto err_class;
1183 1184 1185 1186 1187 1188

	ib_set_client_data(device, &uverbs_client, uverbs_dev);

	return;

err_class:
1189
	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1190 1191

err_cdev:
1192
	cdev_del(&uverbs_dev->cdev);
1193 1194 1195 1196
	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
		clear_bit(devnum, dev_map);
	else
		clear_bit(devnum, overflow_map);
1197 1198

err:
1199 1200
	if (atomic_dec_and_test(&uverbs_dev->refcount))
		ib_uverbs_comp_dev(uverbs_dev);
1201
	wait_for_completion(&uverbs_dev->comp);
1202
	kobject_put(&uverbs_dev->kobj);
1203 1204 1205
	return;
}

1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
					struct ib_device *ib_dev)
{
	struct ib_uverbs_file *file;
	struct ib_uverbs_event_file *event_file;
	struct ib_event event;

	/* Pending running commands to terminate */
	synchronize_srcu(&uverbs_dev->disassociate_srcu);
	event.event = IB_EVENT_DEVICE_FATAL;
	event.element.port_num = 0;
	event.device = ib_dev;

	mutex_lock(&uverbs_dev->lists_mutex);
	while (!list_empty(&uverbs_dev->uverbs_file_list)) {
		struct ib_ucontext *ucontext;
		file = list_first_entry(&uverbs_dev->uverbs_file_list,
					struct ib_uverbs_file, list);
		file->is_closed = 1;
		list_del(&file->list);
		kref_get(&file->ref);
		mutex_unlock(&uverbs_dev->lists_mutex);
1228

1229
		ib_uverbs_event_handler(&file->event_handler, &event);
1230 1231 1232 1233 1234 1235 1236 1237 1238

		mutex_lock(&file->cleanup_mutex);
		ucontext = file->ucontext;
		file->ucontext = NULL;
		mutex_unlock(&file->cleanup_mutex);

		/* At this point ib_uverbs_close cannot be running
		 * ib_uverbs_cleanup_ucontext
		 */
1239
		if (ucontext) {
1240 1241 1242 1243 1244 1245
			/* We must release the mutex before going ahead and
			 * calling disassociate_ucontext. disassociate_ucontext
			 * might end up indirectly calling uverbs_close,
			 * for example due to freeing the resources
			 * (e.g mmput).
			 */
1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
			ib_dev->disassociate_ucontext(ucontext);
			ib_uverbs_cleanup_ucontext(file, ucontext);
		}

		mutex_lock(&uverbs_dev->lists_mutex);
		kref_put(&file->ref, ib_uverbs_release_file);
	}

	while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
		event_file = list_first_entry(&uverbs_dev->
					      uverbs_events_file_list,
					      struct ib_uverbs_event_file,
					      list);
		spin_lock_irq(&event_file->lock);
		event_file->is_closed = 1;
		spin_unlock_irq(&event_file->lock);

		list_del(&event_file->list);
		if (event_file->is_async) {
			ib_unregister_event_handler(&event_file->uverbs_file->
						    event_handler);
			event_file->uverbs_file->event_handler.device = NULL;
		}

		wake_up_interruptible(&event_file->poll_wait);
		kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
	}
	mutex_unlock(&uverbs_dev->lists_mutex);
}

1276
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1277
{
1278
	struct ib_uverbs_device *uverbs_dev = client_data;
1279
	int wait_clients = 1;
1280 1281 1282 1283

	if (!uverbs_dev)
		return;

1284
	dev_set_drvdata(uverbs_dev->dev, NULL);
1285 1286
	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
	cdev_del(&uverbs_dev->cdev);
1287

1288 1289 1290 1291
	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
		clear_bit(uverbs_dev->devnum, dev_map);
	else
		clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
1292

1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
	if (device->disassociate_ucontext) {
		/* We disassociate HW resources and immediately return.
		 * Userspace will see a EIO errno for all future access.
		 * Upon returning, ib_device may be freed internally and is not
		 * valid any more.
		 * uverbs_device is still available until all clients close
		 * their files, then the uverbs device ref count will be zero
		 * and its resources will be freed.
		 * Note: At this point no more files can be opened since the
		 * cdev was deleted, however active clients can still issue
		 * commands and close their open files.
		 */
		rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
		ib_uverbs_free_hw_resources(uverbs_dev, device);
		wait_clients = 0;
	}

1310 1311
	if (atomic_dec_and_test(&uverbs_dev->refcount))
		ib_uverbs_comp_dev(uverbs_dev);
1312 1313
	if (wait_clients)
		wait_for_completion(&uverbs_dev->comp);
1314
	kobject_put(&uverbs_dev->kobj);
1315 1316
}

1317
static char *uverbs_devnode(struct device *dev, umode_t *mode)
1318
{
1319 1320
	if (mode)
		*mode = 0666;
1321 1322 1323
	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}

1324 1325 1326 1327 1328 1329 1330
static int __init ib_uverbs_init(void)
{
	int ret;

	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
				     "infiniband_verbs");
	if (ret) {
1331
		pr_err("user_verbs: couldn't register device number\n");
1332 1333 1334
		goto out;
	}

1335 1336 1337
	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
	if (IS_ERR(uverbs_class)) {
		ret = PTR_ERR(uverbs_class);
1338
		pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1339 1340 1341
		goto out_chrdev;
	}

1342 1343
	uverbs_class->devnode = uverbs_devnode;

1344
	ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1345
	if (ret) {
1346
		pr_err("user_verbs: couldn't create abi_version attribute\n");
1347 1348 1349 1350 1351
		goto out_class;
	}

	ret = ib_register_client(&uverbs_client);
	if (ret) {
1352
		pr_err("user_verbs: couldn't register client\n");
1353
		goto out_class;
1354 1355 1356 1357 1358
	}

	return 0;

out_class:
1359
	class_destroy(uverbs_class);
1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370

out_chrdev:
	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);

out:
	return ret;
}

static void __exit ib_uverbs_cleanup(void)
{
	ib_unregister_client(&uverbs_client);
1371
	class_destroy(uverbs_class);
1372
	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1373 1374
	if (overflow_maj)
		unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
1375 1376 1377 1378 1379 1380 1381
	idr_destroy(&ib_uverbs_pd_idr);
	idr_destroy(&ib_uverbs_mr_idr);
	idr_destroy(&ib_uverbs_mw_idr);
	idr_destroy(&ib_uverbs_ah_idr);
	idr_destroy(&ib_uverbs_cq_idr);
	idr_destroy(&ib_uverbs_qp_idr);
	idr_destroy(&ib_uverbs_srq_idr);
1382 1383 1384 1385
}

module_init(ib_uverbs_init);
module_exit(ib_uverbs_cleanup);