scsi_lib.c 85.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
Linus Torvalds's avatar
Linus Torvalds committed
2
/*
3 4
 * Copyright (C) 1999 Eric Youngdale
 * Copyright (C) 2014 Christoph Hellwig
Linus Torvalds's avatar
Linus Torvalds committed
5 6 7 8 9 10 11 12
 *
 *  SCSI queueing library.
 *      Initial versions: Eric Youngdale (eric@andante.org).
 *                        Based upon conversations with large numbers
 *                        of people at Linux Expo.
 */

#include <linux/bio.h>
13
#include <linux/bitops.h>
Linus Torvalds's avatar
Linus Torvalds committed
14 15 16
#include <linux/blkdev.h>
#include <linux/completion.h>
#include <linux/kernel.h>
17
#include <linux/export.h>
Linus Torvalds's avatar
Linus Torvalds committed
18 19 20
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/delay.h>
21
#include <linux/hardirq.h>
22
#include <linux/scatterlist.h>
23
#include <linux/blk-mq.h>
24
#include <linux/ratelimit.h>
25
#include <asm/unaligned.h>
Linus Torvalds's avatar
Linus Torvalds committed
26 27

#include <scsi/scsi.h>
28
#include <scsi/scsi_cmnd.h>
Linus Torvalds's avatar
Linus Torvalds committed
29 30 31 32 33
#include <scsi/scsi_dbg.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
34
#include <scsi/scsi_transport.h> /* __scsi_init_queue() */
35
#include <scsi/scsi_dh.h>
Linus Torvalds's avatar
Linus Torvalds committed
36

37 38
#include <trace/events/scsi.h>

39
#include "scsi_debugfs.h"
Linus Torvalds's avatar
Linus Torvalds committed
40 41 42
#include "scsi_priv.h"
#include "scsi_logging.h"

43 44 45 46
/*
 * Size of integrity metadata is usually small, 1 inline sg should
 * cover normal cases.
 */
47 48 49 50
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define  SCSI_INLINE_PROT_SG_CNT  0
#define  SCSI_INLINE_SG_CNT  0
#else
51
#define  SCSI_INLINE_PROT_SG_CNT  1
52
#define  SCSI_INLINE_SG_CNT  2
53
#endif
54

55 56 57
static struct kmem_cache *scsi_sense_cache;
static struct kmem_cache *scsi_sense_isadma_cache;
static DEFINE_MUTEX(scsi_sense_cache_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
58

59 60
static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd);

61
static inline struct kmem_cache *
62
scsi_select_sense_cache(bool unchecked_isa_dma)
63
{
64
	return unchecked_isa_dma ? scsi_sense_isadma_cache : scsi_sense_cache;
65 66
}

67 68
static void scsi_free_sense_buffer(bool unchecked_isa_dma,
				   unsigned char *sense_buffer)
69
{
70 71
	kmem_cache_free(scsi_select_sense_cache(unchecked_isa_dma),
			sense_buffer);
72 73
}

74
static unsigned char *scsi_alloc_sense_buffer(bool unchecked_isa_dma,
75
	gfp_t gfp_mask, int numa_node)
76
{
77 78
	return kmem_cache_alloc_node(scsi_select_sense_cache(unchecked_isa_dma),
				     gfp_mask, numa_node);
79 80 81 82 83 84 85
}

int scsi_init_sense_cache(struct Scsi_Host *shost)
{
	struct kmem_cache *cache;
	int ret = 0;

86
	mutex_lock(&scsi_sense_cache_mutex);
87
	cache = scsi_select_sense_cache(shost->unchecked_isa_dma);
88
	if (cache)
89
		goto exit;
90 91 92 93

	if (shost->unchecked_isa_dma) {
		scsi_sense_isadma_cache =
			kmem_cache_create("scsi_sense_cache(DMA)",
94 95
				SCSI_SENSE_BUFFERSIZE, 0,
				SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL);
96 97 98 99
		if (!scsi_sense_isadma_cache)
			ret = -ENOMEM;
	} else {
		scsi_sense_cache =
100 101 102
			kmem_cache_create_usercopy("scsi_sense_cache",
				SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN,
				0, SCSI_SENSE_BUFFERSIZE, NULL);
103 104 105
		if (!scsi_sense_cache)
			ret = -ENOMEM;
	}
106
 exit:
107 108 109
	mutex_unlock(&scsi_sense_cache_mutex);
	return ret;
}
110

111 112 113 114 115 116 117
/*
 * When to reinvoke queueing after a resource shortage. It's 3 msecs to
 * not change behaviour from the previous unplug mechanism, experimentation
 * may prove this needs changing.
 */
#define SCSI_QUEUE_DELAY	3

118 119
static void
scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
Linus Torvalds's avatar
Linus Torvalds committed
120 121 122
{
	struct Scsi_Host *host = cmd->device->host;
	struct scsi_device *device = cmd->device;
123
	struct scsi_target *starget = scsi_target(device);
Linus Torvalds's avatar
Linus Torvalds committed
124 125

	/*
126
	 * Set the appropriate busy bit for the device/host.
Linus Torvalds's avatar
Linus Torvalds committed
127 128 129 130 131 132 133 134 135 136 137
	 *
	 * If the host/device isn't busy, assume that something actually
	 * completed, and that we should be able to queue a command now.
	 *
	 * Note that the prior mid-layer assumption that any host could
	 * always queue at least one command is now broken.  The mid-layer
	 * will implement a user specifiable stall (see
	 * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
	 * if a command is requeued with no other commands outstanding
	 * either for the device or for the host.
	 */
138 139
	switch (reason) {
	case SCSI_MLQUEUE_HOST_BUSY:
140
		atomic_set(&host->host_blocked, host->max_host_blocked);
141 142
		break;
	case SCSI_MLQUEUE_DEVICE_BUSY:
143
	case SCSI_MLQUEUE_EH_RETRY:
144 145
		atomic_set(&device->device_blocked,
			   device->max_device_blocked);
146 147
		break;
	case SCSI_MLQUEUE_TARGET_BUSY:
148 149
		atomic_set(&starget->target_blocked,
			   starget->max_target_blocked);
150 151
		break;
	}
152 153
}

154 155
static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
{
156 157 158 159 160 161
	if (cmd->request->rq_flags & RQF_DONTPREP) {
		cmd->request->rq_flags &= ~RQF_DONTPREP;
		scsi_mq_uninit_cmd(cmd);
	} else {
		WARN_ON_ONCE(true);
	}
162
	blk_mq_requeue_request(cmd->request, true);
163 164
}

165 166 167 168 169 170 171 172 173 174 175 176
/**
 * __scsi_queue_insert - private queue insertion
 * @cmd: The SCSI command being requeued
 * @reason:  The reason for the requeue
 * @unbusy: Whether the queue should be unbusied
 *
 * This is a private queue insertion.  The public interface
 * scsi_queue_insert() always assumes the queue should be unbusied
 * because it's always called before the completion.  This function is
 * for a requeue after completion, which should only occur in this
 * file.
 */
177
static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
178 179 180 181 182 183 184
{
	struct scsi_device *device = cmd->device;

	SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd,
		"Inserting command %p into mlqueue\n", cmd));

	scsi_set_blocked(cmd, reason);
Linus Torvalds's avatar
Linus Torvalds committed
185 186 187 188 189

	/*
	 * Decrement the counters, since these commands are no longer
	 * active on the host/device.
	 */
190
	if (unbusy)
191
		scsi_device_unbusy(device, cmd);
Linus Torvalds's avatar
Linus Torvalds committed
192 193

	/*
194
	 * Requeue this command.  It will go before all other commands
195 196 197
	 * that are already in the queue. Schedule requeue work under
	 * lock such that the kblockd_schedule_work() call happens
	 * before blk_cleanup_queue() finishes.
198
	 */
199
	cmd->result = 0;
200 201

	blk_mq_requeue_request(cmd->request, true);
Linus Torvalds's avatar
Linus Torvalds committed
202 203
}

204 205 206 207
/**
 * scsi_queue_insert - Reinsert a command in the queue.
 * @cmd:    command that we are adding to queue.
 * @reason: why we are inserting command to queue.
208
 *
209 210 211
 * We do this for one of two cases. Either the host is busy and it cannot accept
 * any more commands for the time being, or the device returned QUEUE_FULL and
 * can accept no more commands.
212
 *
213 214
 * Context: This could be called either from an interrupt context or a normal
 * process context.
215
 */
216
void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
217
{
218
	__scsi_queue_insert(cmd, reason, true);
219
}
220

221 222

/**
223
 * __scsi_execute - insert request and wait for the result
224 225 226 227 228 229 230 231 232 233 234 235 236
 * @sdev:	scsi device
 * @cmd:	scsi command
 * @data_direction: data direction
 * @buffer:	data buffer
 * @bufflen:	len of buffer
 * @sense:	optional sense buffer
 * @sshdr:	optional decoded sense header
 * @timeout:	request timeout in seconds
 * @retries:	number of times to retry request
 * @flags:	flags for ->cmd_flags
 * @rq_flags:	flags for ->rq_flags
 * @resid:	optional residual length
 *
237 238
 * Returns the scsi_cmnd result field if a command was executed, or a negative
 * Linux error code if we didn't get that far.
239
 */
240
int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
241
		 int data_direction, void *buffer, unsigned bufflen,
242 243 244
		 unsigned char *sense, struct scsi_sense_hdr *sshdr,
		 int timeout, int retries, u64 flags, req_flags_t rq_flags,
		 int *resid)
245 246
{
	struct request *req;
247
	struct scsi_request *rq;
248 249
	int ret = DRIVER_ERROR << 24;

250
	req = blk_get_request(sdev->request_queue,
251
			data_direction == DMA_TO_DEVICE ?
252 253
			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
			rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0);
254
	if (IS_ERR(req))
255
		return ret;
256
	rq = scsi_req(req);
257 258

	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
259
					buffer, bufflen, GFP_NOIO))
260 261
		goto out;

262 263
	rq->cmd_len = COMMAND_SIZE(cmd[0]);
	memcpy(rq->cmd, cmd, rq->cmd_len);
264
	rq->retries = retries;
265
	req->timeout = timeout;
266
	req->cmd_flags |= flags;
267
	req->rq_flags |= rq_flags | RQF_QUIET;
268 269 270 271

	/*
	 * head injection *required* here otherwise quiesce won't work
	 */
272
	blk_execute_rq(NULL, req, 1);
273

274 275 276 277 278 279
	/*
	 * Some devices (USB mass-storage in particular) may transfer
	 * garbage data together with a residue indicating that the data
	 * is invalid.  Prevent the garbage from being misinterpreted
	 * and prevent security leaks by zeroing out the excess data.
	 */
280 281
	if (unlikely(rq->resid_len > 0 && rq->resid_len <= bufflen))
		memset(buffer + (bufflen - rq->resid_len), 0, rq->resid_len);
282

283
	if (resid)
284 285 286
		*resid = rq->resid_len;
	if (sense && rq->sense_len)
		memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
287 288
	if (sshdr)
		scsi_normalize_sense(rq->sense, rq->sense_len, sshdr);
289
	ret = rq->result;
290 291 292 293 294
 out:
	blk_put_request(req);

	return ret;
}
295
EXPORT_SYMBOL(__scsi_execute);
296

297
/*
298 299 300 301 302
 * Wake up the error handler if necessary. Avoid as follows that the error
 * handler is not woken up if host in-flight requests number ==
 * shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination
 * with an RCU read lock in this function to ensure that this function in
 * its entirety either finishes before scsi_eh_scmd_add() increases the
303 304 305
 * host_failed counter or that it notices the shost state change made by
 * scsi_eh_scmd_add().
 */
306
static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
Linus Torvalds's avatar
Linus Torvalds committed
307 308 309
{
	unsigned long flags;

310
	rcu_read_lock();
311
	__clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
312
	if (unlikely(scsi_host_in_recovery(shost))) {
313
		spin_lock_irqsave(shost->host_lock, flags);
314 315
		if (shost->host_failed || shost->host_eh_scheduled)
			scsi_eh_wakeup(shost);
316 317
		spin_unlock_irqrestore(shost->host_lock, flags);
	}
318 319 320
	rcu_read_unlock();
}

321
void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd)
322 323 324 325
{
	struct Scsi_Host *shost = sdev->host;
	struct scsi_target *starget = scsi_target(sdev);

326
	scsi_dec_host_busy(shost, cmd);
327 328 329

	if (starget->can_queue > 0)
		atomic_dec(&starget->target_busy);
330

331
	sbitmap_put(&sdev->budget_map, cmd->budget_token);
332
	cmd->budget_token = -1;
Linus Torvalds's avatar
Linus Torvalds committed
333 334
}

335 336
static void scsi_kick_queue(struct request_queue *q)
{
337
	blk_mq_run_hw_queues(q, false);
338 339
}

Linus Torvalds's avatar
Linus Torvalds committed
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
/*
 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
 * and call blk_run_queue for all the scsi_devices on the target -
 * including current_sdev first.
 *
 * Called with *no* scsi locks held.
 */
static void scsi_single_lun_run(struct scsi_device *current_sdev)
{
	struct Scsi_Host *shost = current_sdev->host;
	struct scsi_device *sdev, *tmp;
	struct scsi_target *starget = scsi_target(current_sdev);
	unsigned long flags;

	spin_lock_irqsave(shost->host_lock, flags);
	starget->starget_sdev_user = NULL;
	spin_unlock_irqrestore(shost->host_lock, flags);

	/*
	 * Call blk_run_queue for all LUNs on the target, starting with
	 * current_sdev. We race with others (to set starget_sdev_user),
	 * but in most cases, we will be first. Ideally, each LU on the
	 * target would get some limited time or requests on the target.
	 */
364
	scsi_kick_queue(current_sdev->request_queue);
Linus Torvalds's avatar
Linus Torvalds committed
365 366 367 368 369 370 371 372 373 374 375 376

	spin_lock_irqsave(shost->host_lock, flags);
	if (starget->starget_sdev_user)
		goto out;
	list_for_each_entry_safe(sdev, tmp, &starget->devices,
			same_target_siblings) {
		if (sdev == current_sdev)
			continue;
		if (scsi_device_get(sdev))
			continue;

		spin_unlock_irqrestore(shost->host_lock, flags);
377
		scsi_kick_queue(sdev->request_queue);
Linus Torvalds's avatar
Linus Torvalds committed
378
		spin_lock_irqsave(shost->host_lock, flags);
379

Linus Torvalds's avatar
Linus Torvalds committed
380 381 382 383 384 385
		scsi_device_put(sdev);
	}
 out:
	spin_unlock_irqrestore(shost->host_lock, flags);
}

386
static inline bool scsi_device_is_busy(struct scsi_device *sdev)
387
{
388
	if (scsi_device_busy(sdev) >= sdev->queue_depth)
389 390 391 392
		return true;
	if (atomic_read(&sdev->device_blocked) > 0)
		return true;
	return false;
393 394
}

395
static inline bool scsi_target_is_busy(struct scsi_target *starget)
396
{
397 398 399 400 401 402
	if (starget->can_queue > 0) {
		if (atomic_read(&starget->target_busy) >= starget->can_queue)
			return true;
		if (atomic_read(&starget->target_blocked) > 0)
			return true;
	}
403
	return false;
404 405
}

406
static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
407
{
408 409 410 411 412
	if (atomic_read(&shost->host_blocked) > 0)
		return true;
	if (shost->host_self_blocked)
		return true;
	return false;
413 414
}

415
static void scsi_starved_list_run(struct Scsi_Host *shost)
Linus Torvalds's avatar
Linus Torvalds committed
416
{
417
	LIST_HEAD(starved_list);
418
	struct scsi_device *sdev;
Linus Torvalds's avatar
Linus Torvalds committed
419 420 421
	unsigned long flags;

	spin_lock_irqsave(shost->host_lock, flags);
422 423 424
	list_splice_init(&shost->starved_list, &starved_list);

	while (!list_empty(&starved_list)) {
425 426
		struct request_queue *slq;

Linus Torvalds's avatar
Linus Torvalds committed
427 428 429 430 431 432 433 434 435 436
		/*
		 * As long as shost is accepting commands and we have
		 * starved queues, call blk_run_queue. scsi_request_fn
		 * drops the queue_lock and can add us back to the
		 * starved_list.
		 *
		 * host_lock protects the starved_list and starved_entry.
		 * scsi_request_fn must get the host_lock before checking
		 * or modifying starved_list or starved_entry.
		 */
437
		if (scsi_host_is_busy(shost))
438 439
			break;

440 441 442
		sdev = list_entry(starved_list.next,
				  struct scsi_device, starved_entry);
		list_del_init(&sdev->starved_entry);
443 444 445 446 447 448
		if (scsi_target_is_busy(scsi_target(sdev))) {
			list_move_tail(&sdev->starved_entry,
				       &shost->starved_list);
			continue;
		}

449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
		/*
		 * Once we drop the host lock, a racing scsi_remove_device()
		 * call may remove the sdev from the starved list and destroy
		 * it and the queue.  Mitigate by taking a reference to the
		 * queue and never touching the sdev again after we drop the
		 * host lock.  Note: if __scsi_remove_device() invokes
		 * blk_cleanup_queue() before the queue is run from this
		 * function then blk_run_queue() will return immediately since
		 * blk_cleanup_queue() marks the queue with QUEUE_FLAG_DYING.
		 */
		slq = sdev->request_queue;
		if (!blk_get_queue(slq))
			continue;
		spin_unlock_irqrestore(shost->host_lock, flags);

464
		scsi_kick_queue(slq);
465 466 467
		blk_put_queue(slq);

		spin_lock_irqsave(shost->host_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
468
	}
469 470
	/* put any unprocessed entries back */
	list_splice(&starved_list, &shost->starved_list);
Linus Torvalds's avatar
Linus Torvalds committed
471
	spin_unlock_irqrestore(shost->host_lock, flags);
472 473
}

474 475 476
/**
 * scsi_run_queue - Select a proper request queue to serve next.
 * @q:  last request's queue
477
 *
478
 * The previous command was completely finished, start a new one if possible.
479 480 481 482 483 484 485 486 487
 */
static void scsi_run_queue(struct request_queue *q)
{
	struct scsi_device *sdev = q->queuedata;

	if (scsi_target(sdev)->single_lun)
		scsi_single_lun_run(sdev);
	if (!list_empty(&sdev->host->starved_list))
		scsi_starved_list_run(sdev->host);
Linus Torvalds's avatar
Linus Torvalds committed
488

489
	blk_mq_run_hw_queues(q, false);
Linus Torvalds's avatar
Linus Torvalds committed
490 491
}

492 493 494 495 496 497 498 499 500 501
void scsi_requeue_run_queue(struct work_struct *work)
{
	struct scsi_device *sdev;
	struct request_queue *q;

	sdev = container_of(work, struct scsi_device, requeue_work);
	q = sdev->request_queue;
	scsi_run_queue(q);
}

Linus Torvalds's avatar
Linus Torvalds committed
502 503 504 505 506 507 508 509
void scsi_run_host_queues(struct Scsi_Host *shost)
{
	struct scsi_device *sdev;

	shost_for_each_device(sdev, shost)
		scsi_run_queue(sdev->request_queue);
}

510 511
static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
{
512
	if (!blk_rq_is_passthrough(cmd->request)) {
513 514 515 516 517 518 519
		struct scsi_driver *drv = scsi_cmd_to_driver(cmd);

		if (drv->uninit_command)
			drv->uninit_command(cmd);
	}
}

520
void scsi_free_sgtables(struct scsi_cmnd *cmd)
521 522
{
	if (cmd->sdb.table.nents)
523 524
		sg_free_table_chained(&cmd->sdb.table,
				SCSI_INLINE_SG_CNT);
525
	if (scsi_prot_sg_count(cmd))
526 527
		sg_free_table_chained(&cmd->prot_sdb->table,
				SCSI_INLINE_PROT_SG_CNT);
528
}
529
EXPORT_SYMBOL_GPL(scsi_free_sgtables);
530 531 532

static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
{
533
	scsi_free_sgtables(cmd);
534 535 536
	scsi_uninit_cmd(cmd);
}

537 538 539
static void scsi_run_queue_async(struct scsi_device *sdev)
{
	if (scsi_target(sdev)->single_lun ||
540
	    !list_empty(&sdev->host->starved_list)) {
541
		kblockd_schedule_work(&sdev->requeue_work);
542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
	} else {
		/*
		 * smp_mb() present in sbitmap_queue_clear() or implied in
		 * .end_io is for ordering writing .device_busy in
		 * scsi_device_unbusy() and reading sdev->restarts.
		 */
		int old = atomic_read(&sdev->restarts);

		/*
		 * ->restarts has to be kept as non-zero if new budget
		 *  contention occurs.
		 *
		 *  No need to run queue when either another re-run
		 *  queue wins in updating ->restarts or a new budget
		 *  contention occurs.
		 */
		if (old && atomic_cmpxchg(&sdev->restarts, old, 0) == old)
			blk_mq_run_hw_queues(sdev->request_queue, true);
	}
561 562
}

563
/* Returns false when no more bytes to process, true if there are more */
564
static bool scsi_end_request(struct request *req, blk_status_t error,
565
		unsigned int bytes)
566
{
567
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
568 569 570 571 572 573 574 575 576
	struct scsi_device *sdev = cmd->device;
	struct request_queue *q = sdev->request_queue;

	if (blk_update_request(req, error, bytes))
		return true;

	if (blk_queue_add_random(q))
		add_disk_randomness(req->rq_disk);

577 578 579 580 581
	if (!blk_rq_is_scsi(req)) {
		WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
		cmd->flags &= ~SCMD_INITIALIZED;
	}

582 583 584 585 586 587 588 589
	/*
	 * Calling rcu_barrier() is not necessary here because the
	 * SCSI error handler guarantees that the function called by
	 * call_rcu() has been called before scsi_end_request() is
	 * called.
	 */
	destroy_rcu_head(&cmd->rcu);

590 591 592 593 594 595 596 597
	/*
	 * In the MQ case the command gets freed by __blk_mq_end_request,
	 * so we have to do all cleanup that depends on it earlier.
	 *
	 * We also can't kick the queues from irq context, so we
	 * will have to defer it to a workqueue.
	 */
	scsi_mq_uninit_cmd(cmd);
598

599 600 601 602 603
	/*
	 * queue is still alive, so grab the ref for preventing it
	 * from being cleaned up during running queue.
	 */
	percpu_ref_get(&q->q_usage_counter);
604

605
	__blk_mq_end_request(req, error);
606

607
	scsi_run_queue_async(sdev);
608

609
	percpu_ref_put(&q->q_usage_counter);
610 611 612
	return false;
}

613
/**
614 615
 * scsi_result_to_blk_status - translate a SCSI result code into blk_status_t
 * @cmd:	SCSI command
616 617
 * @result:	scsi error code
 *
618 619
 * Translate a SCSI result code into a blk_status_t value. May reset the host
 * byte of @cmd->result.
620
 */
621
static blk_status_t scsi_result_to_blk_status(struct scsi_cmnd *cmd, int result)
622
{
623
	switch (host_byte(result)) {
624 625 626 627 628 629 630 631 632
	case DID_OK:
		/*
		 * Also check the other bytes than the status byte in result
		 * to handle the case when a SCSI LLD sets result to
		 * DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION.
		 */
		if (scsi_status_is_good(result) && (result & ~0xff) == 0)
			return BLK_STS_OK;
		return BLK_STS_IOERR;
633
	case DID_TRANSPORT_FAILFAST:
634
	case DID_TRANSPORT_MARGINAL:
635
		return BLK_STS_TRANSPORT;
636
	case DID_TARGET_FAILURE:
637
		set_host_byte(cmd, DID_OK);
638
		return BLK_STS_TARGET;
639
	case DID_NEXUS_FAILURE:
640
		set_host_byte(cmd, DID_OK);
641
		return BLK_STS_NEXUS;
642 643
	case DID_ALLOC_FAILURE:
		set_host_byte(cmd, DID_OK);
644
		return BLK_STS_NOSPC;
645 646
	case DID_MEDIUM_ERROR:
		set_host_byte(cmd, DID_OK);
647
		return BLK_STS_MEDIUM;
648
	default:
649
		return BLK_STS_IOERR;
650 651 652
	}
}

653 654 655 656 657
/* Helper for scsi_io_completion() when "reprep" action required. */
static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
				      struct request_queue *q)
{
	/* A new command will be prepared and issued. */
658
	scsi_mq_requeue_cmd(cmd);
659 660
}

661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
{
	struct request *req = cmd->request;
	unsigned long wait_for;

	if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
		return false;

	wait_for = (cmd->allowed + 1) * req->timeout;
	if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
		scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n",
			    wait_for/HZ);
		return true;
	}
	return false;
}

678 679
/* Helper for scsi_io_completion() when special action required. */
static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
Linus Torvalds's avatar
Linus Torvalds committed
680
{
681
	struct request_queue *q = cmd->device->request_queue;
Linus Torvalds's avatar
Linus Torvalds committed
682
	struct request *req = cmd->request;
683
	int level = 0;
684 685
	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
	      ACTION_DELAYED_RETRY} action;
686 687 688 689
	struct scsi_sense_hdr sshdr;
	bool sense_valid;
	bool sense_current = true;      /* false implies "deferred sense" */
	blk_status_t blk_stat;
Linus Torvalds's avatar
Linus Torvalds committed
690

691 692 693
	sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
	if (sense_valid)
		sense_current = !scsi_sense_is_deferred(&sshdr);
694

695
	blk_stat = scsi_result_to_blk_status(cmd, result);
696

697 698 699 700 701 702
	if (host_byte(result) == DID_RESET) {
		/* Third party bus reset or reset for error recovery
		 * reasons.  Just retry the command and see what
		 * happens.
		 */
		action = ACTION_RETRY;
703
	} else if (sense_valid && sense_current) {
Linus Torvalds's avatar
Linus Torvalds committed
704 705 706
		switch (sshdr.sense_key) {
		case UNIT_ATTENTION:
			if (cmd->device->removable) {
707
				/* Detected disc change.  Set a bit
Linus Torvalds's avatar
Linus Torvalds committed
708 709 710
				 * and quietly refuse further access.
				 */
				cmd->device->changed = 1;
711
				action = ACTION_FAIL;
Linus Torvalds's avatar
Linus Torvalds committed
712
			} else {
713 714 715
				/* Must have been a power glitch, or a
				 * bus reset.  Could not have been a
				 * media change, so we just retry the
716
				 * command and see what happens.
717
				 */
718
				action = ACTION_RETRY;
Linus Torvalds's avatar
Linus Torvalds committed
719 720 721
			}
			break;
		case ILLEGAL_REQUEST:
722 723 724 725 726 727 728 729
			/* If we had an ILLEGAL REQUEST returned, then
			 * we may have performed an unsupported
			 * command.  The only thing this should be
			 * would be a ten byte read where only a six
			 * byte read was supported.  Also, on a system
			 * where READ CAPACITY failed, we may have
			 * read past the end of the disk.
			 */
730 731
			if ((cmd->device->use_10_for_rw &&
			    sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
Linus Torvalds's avatar
Linus Torvalds committed
732 733
			    (cmd->cmnd[0] == READ_10 ||
			     cmd->cmnd[0] == WRITE_10)) {
734
				/* This will issue a new 6-byte command. */
Linus Torvalds's avatar
Linus Torvalds committed
735
				cmd->device->use_10_for_rw = 0;
736
				action = ACTION_REPREP;
737 738
			} else if (sshdr.asc == 0x10) /* DIX */ {
				action = ACTION_FAIL;
739
				blk_stat = BLK_STS_PROTECTION;
740
			/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
741
			} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
742
				action = ACTION_FAIL;
743
				blk_stat = BLK_STS_TARGET;
744 745 746
			} else
				action = ACTION_FAIL;
			break;
747
		case ABORTED_COMMAND:
748
			action = ACTION_FAIL;
749
			if (sshdr.asc == 0x10) /* DIF */
750
				blk_stat = BLK_STS_PROTECTION;
Linus Torvalds's avatar
Linus Torvalds committed
751 752
			break;
		case NOT_READY:
753
			/* If the device is in the process of becoming
754
			 * ready, or has a temporary blockage, retry.
Linus Torvalds's avatar
Linus Torvalds committed
755
			 */
756 757 758 759 760 761 762 763 764
			if (sshdr.asc == 0x04) {
				switch (sshdr.ascq) {
				case 0x01: /* becoming ready */
				case 0x04: /* format in progress */
				case 0x05: /* rebuild in progress */
				case 0x06: /* recalculation in progress */
				case 0x07: /* operation in progress */
				case 0x08: /* Long write in progress */
				case 0x09: /* self test in progress */
765
				case 0x14: /* space allocation in progress */
766 767 768 769
				case 0x1a: /* start stop unit in progress */
				case 0x1b: /* sanitize in progress */
				case 0x1d: /* configuration in progress */
				case 0x24: /* depopulation in progress */
770
					action = ACTION_DELAYED_RETRY;
771
					break;
772 773 774
				case 0x0a: /* ALUA state transition */
					blk_stat = BLK_STS_AGAIN;
					fallthrough;
775 776 777
				default:
					action = ACTION_FAIL;
					break;
778
				}
779
			} else
780 781
				action = ACTION_FAIL;
			break;
Linus Torvalds's avatar
Linus Torvalds committed
782
		case VOLUME_OVERFLOW:
783
			/* See SSC3rXX or current. */
784 785
			action = ACTION_FAIL;
			break;
786 787 788 789 790 791 792 793 794
		case DATA_PROTECT:
			action = ACTION_FAIL;
			if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) ||
			    (sshdr.asc == 0x55 &&
			     (sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) {
				/* Insufficient zone resources */
				blk_stat = BLK_STS_ZONE_OPEN_RESOURCE;
			}
			break;
Linus Torvalds's avatar
Linus Torvalds committed
795
		default:
796
			action = ACTION_FAIL;
Linus Torvalds's avatar
Linus Torvalds committed
797 798
			break;
		}
799
	} else
800 801
		action = ACTION_FAIL;

802
	if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd))
803 804
		action = ACTION_FAIL;

805 806 807
	switch (action) {
	case ACTION_FAIL:
		/* Give up and fail the remainder of the request */
808
		if (!(req->rq_flags & RQF_QUIET)) {
809 810 811 812 813
			static DEFINE_RATELIMIT_STATE(_rs,
					DEFAULT_RATELIMIT_INTERVAL,
					DEFAULT_RATELIMIT_BURST);

			if (unlikely(scsi_logging_level))
814 815 816
				level =
				     SCSI_LOG_LEVEL(SCSI_LOG_MLCOMPLETE_SHIFT,
						    SCSI_LOG_MLCOMPLETE_BITS);
817 818 819 820 821 822 823

			/*
			 * if logging is enabled the failure will be printed
			 * in scsi_log_completion(), so avoid duplicate messages
			 */
			if (!level && __ratelimit(&_rs)) {
				scsi_print_result(cmd, NULL, FAILED);
824
				if (driver_byte(result) == DRIVER_SENSE)
825 826 827
					scsi_print_sense(cmd);
				scsi_print_command(cmd);
			}
828
		}
829
		if (!scsi_end_request(req, blk_stat, blk_rq_err_bytes(req)))
830
			return;
831
		fallthrough;
832
	case ACTION_REPREP:
833
		scsi_io_completion_reprep(cmd, q);
834 835 836
		break;
	case ACTION_RETRY:
		/* Retry the same command immediately */
837
		__scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY, false);
838 839 840
		break;
	case ACTION_DELAYED_RETRY:
		/* Retry the same command after a delay */
841
		__scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, false);
842
		break;
Linus Torvalds's avatar
Linus Torvalds committed
843 844 845
	}
}

846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
/*
 * Helper for scsi_io_completion() when cmd->result is non-zero. Returns a
 * new result that may suppress further error checking. Also modifies
 * *blk_statp in some cases.
 */
static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
					blk_status_t *blk_statp)
{
	bool sense_valid;
	bool sense_current = true;	/* false implies "deferred sense" */
	struct request *req = cmd->request;
	struct scsi_sense_hdr sshdr;

	sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
	if (sense_valid)
		sense_current = !scsi_sense_is_deferred(&sshdr);

	if (blk_rq_is_passthrough(req)) {
		if (sense_valid) {
			/*
			 * SG_IO wants current and deferred errors
			 */
			scsi_req(req)->sense_len =
				min(8 + cmd->sense_buffer[7],
				    SCSI_SENSE_BUFFERSIZE);
		}
		if (sense_current)
			*blk_statp = scsi_result_to_blk_status(cmd, result);
	} else if (blk_rq_bytes(req) == 0 && sense_current) {
		/*
		 * Flush commands do not transfers any data, and thus cannot use
		 * good_bytes != blk_rq_bytes(req) as the signal for an error.
		 * This sets *blk_statp explicitly for the problem case.
		 */
		*blk_statp = scsi_result_to_blk_status(cmd, result);
	}
	/*
	 * Recovered errors need reporting, but they're always treated as
	 * success, so fiddle the result code here.  For passthrough requests
	 * we already took a copy of the original into sreq->result which
	 * is what gets returned to the user
	 */
	if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) {
		bool do_print = true;
		/*
		 * if ATA PASS-THROUGH INFORMATION AVAILABLE [0x0, 0x1d]
		 * skip print since caller wants ATA registers. Only occurs
		 * on SCSI ATA PASS_THROUGH commands when CK_COND=1
		 */
		if ((sshdr.asc == 0x0) && (sshdr.ascq == 0x1d))
			do_print = false;
		else if (req->rq_flags & RQF_QUIET)
			do_print = false;
		if (do_print)
			scsi_print_sense(cmd);
		result = 0;
		/* for passthrough, *blk_statp may be set */
		*blk_statp = BLK_STS_OK;
	}
	/*
	 * Another corner case: the SCSI status byte is non-zero but 'good'.
	 * Example: PRE-FETCH command returns SAM_STAT_CONDITION_MET when
	 * it is able to fit nominated LBs in its cache (and SAM_STAT_GOOD
	 * if it can't fit). Treat SAM_STAT_CONDITION_MET and the related
	 * intermediate statuses (both obsolete in SAM-4) as good.
	 */
	if (status_byte(result) && scsi_status_is_good(result)) {
		result = 0;
		*blk_statp = BLK_STS_OK;
	}
	return result;
}

919 920 921 922
/**
 * scsi_io_completion - Completion processing for SCSI commands.
 * @cmd:	command that is finished.
 * @good_bytes:	number of processed bytes.
Linus Torvalds's avatar
Linus Torvalds committed
923
 *
924 925 926
 * We will finish off the specified number of sectors. If we are done, the
 * command block will be released and the queue function will be goosed. If we
 * are not done then we have to figure out what to do next:
Linus Torvalds's avatar
Linus Torvalds committed
927
 *
928 929 930 931 932
 *   a) We can call scsi_io_completion_reprep().  The request will be
 *	unprepared and put back on the queue.  Then a new command will
 *	be created for it.  This should be used if we made forward
 *	progress, or if we want to switch from READ(10) to READ(6) for
 *	example.
Linus Torvalds's avatar
Linus Torvalds committed
933
 *
934 935 936
 *   b) We can call scsi_io_completion_action().  The request will be
 *	put back on the queue and retried using the same command as
 *	before, possibly after a delay.
937
 *
938 939
 *   c) We can call scsi_end_request() with blk_stat other than
 *	BLK_STS_OK, to fail the remainder of the request.
Linus Torvalds's avatar
Linus Torvalds committed
940
 */
941
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
Linus Torvalds's avatar
Linus Torvalds committed
942 943
{
	int result = cmd->result;
944
	struct request_queue *q = cmd->device->request_queue;
Linus Torvalds's avatar
Linus Torvalds committed
945
	struct request *req = cmd->request;
946
	blk_status_t blk_stat = BLK_STS_OK;
Linus Torvalds's avatar
Linus Torvalds committed
947

948
	if (unlikely(result))	/* a nz result may or may not be an error */
949
		result = scsi_io_completion_nz_result(cmd, result, &blk_stat);
950

951
	if (unlikely(blk_rq_is_passthrough(req))) {
952
		/*
953
		 * scsi_result_to_blk_status may have reset the host_byte
954
		 */
955
		scsi_req(req)->result = cmd->result;
956
	}
957

Linus Torvalds's avatar
Linus Torvalds committed
958 959 960 961
	/*
	 * Next deal with any sectors which we were able to correctly
	 * handle.
	 */
962 963 964
	SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, cmd,
		"%u sectors total, %d bytes done.\n",
		blk_rq_sectors(req), good_bytes));
965

966
	/*
967
	 * Failed, zero length commands always need to drop down
968
	 * to retry code. Fast path should return in this block.
969
	 */
970
	if (likely(blk_rq_bytes(req) > 0 || blk_stat == BLK_STS_OK)) {
971
		if (likely(!scsi_end_request(req, blk_stat, good_bytes)))
972 973
			return; /* no bytes remaining */
	}
974

975 976
	/* Kill remainder if no retries. */
	if (unlikely(blk_stat && scsi_noretry_cmd(cmd))) {
977
		if (scsi_end_request(req, blk_stat, blk_rq_bytes(req)))
978 979
			WARN_ONCE(true,
			    "Bytes remaining after failed, no-retry command");
980
		return;
981 982 983 984 985
	}

	/*
	 * If there had been no error, but we have leftover bytes in the
	 * requeues just queue the command up again.
986
	 */
987
	if (likely(result == 0))
988 989
		scsi_io_completion_reprep(cmd, q);
	else
990
		scsi_io_completion_action(cmd, result);
Linus Torvalds's avatar
Linus Torvalds committed
991 992
}

993 994
static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev,
		struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
995
{
996 997 998
	return sdev->dma_drain_len && blk_rq_is_passthrough(rq) &&
	       !op_is_write(req_op(rq)) &&
	       sdev->host->hostt->dma_need_drain(rq);
Linus Torvalds's avatar
Linus Torvalds committed
999
}
1000

1001
/**
1002 1003 1004 1005 1006
 * scsi_alloc_sgtables - Allocate and initialize data and integrity scatterlists
 * @cmd: SCSI command data structure to initialize.
 *
 * Initializes @cmd->sdb and also @cmd->prot_sdb if data integrity is enabled
 * for @cmd.
1007
 *
1008 1009 1010 1011
 * Returns:
 * * BLK_STS_OK       - on success
 * * BLK_STS_RESOURCE - if the failure is retryable
 * * BLK_STS_IOERR    - if the failure is fatal
1012
 */
1013
blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
1014
{
1015
	struct scsi_device *sdev = cmd->device;
1016
	struct request *rq = cmd->request;
1017 1018
	unsigned short nr_segs = blk_rq_nr_phys_segments(rq);
	struct scatterlist *last_sg = NULL;
1019
	blk_status_t ret;
1020
	bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq);
1021
	int count;
1022

1023
	if (WARN_ON_ONCE(!nr_segs))
1024
		return BLK_STS_IOERR;
1025

1026 1027 1028 1029 1030 1031 1032
	/*
	 * Make sure there is space for the drain.  The driver must adjust
	 * max_hw_segments to be prepared for this.
	 */
	if (need_drain)
		nr_segs++;

1033 1034 1035
	/*
	 * If sg table allocation fails, requeue request later.
	 */
1036 1037
	if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs,
			cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT)))
1038 1039 1040 1041 1042 1043
		return BLK_STS_RESOURCE;

	/*
	 * Next, walk the list, and fill in the addresses and sizes of
	 * each segment.
	 */
1044 1045
	count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);

1046 1047 1048 1049 1050 1051 1052 1053
	if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
		unsigned int pad_len =
			(rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;

		last_sg->length += pad_len;
		cmd->extra_len += pad_len;
	}

1054 1055 1056 1057 1058 1059
	if (need_drain) {
		sg_unmark_end(last_sg);
		last_sg = sg_next(last_sg);
		sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len);
		sg_mark_end(last_sg);

1060
		cmd->extra_len += sdev->dma_drain_len;
1061 1062 1063
		count++;
	}

1064 1065 1066
	BUG_ON(count > cmd->sdb.table.nents);
	cmd->sdb.table.nents = count;
	cmd->sdb.length = blk_rq_payload_bytes(rq);
1067

1068
	if (blk_integrity_rq(rq)) {
1069
		struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
1070
		int ivecs;
1071

1072
		if (WARN_ON_ONCE(!prot_sdb)) {
1073 1074 1075 1076 1077
			/*
			 * This can happen if someone (e.g. multipath)
			 * queues a command to a device on an adapter
			 * that does not support DIX.
			 */
1078
			ret = BLK_STS_IOERR;
1079
			goto out_free_sgtables;
1080 1081
		}

1082
		ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);
1083

1084
		if (sg_alloc_table_chained(&prot_sdb->table, ivecs,
1085
				prot_sdb->table.sgl,
1086
				SCSI_INLINE_PROT_SG_CNT)) {
1087
			ret = BLK_STS_RESOURCE;
1088
			goto out_free_sgtables;
1089 1090
		}

1091
		count = blk_rq_map_integrity_sg(rq->q, rq->bio,
1092
						prot_sdb->table.sgl);
1093 1094
		BUG_ON(count > ivecs);
		BUG_ON(count > queue_max_integrity_segments(rq->q));
1095 1096 1097 1098 1099

		cmd->prot_sdb = prot_sdb;
		cmd->prot_sdb->table.nents = count;
	}

1100
	return BLK_STS_OK;
1101
out_free_sgtables:
1102
	scsi_free_sgtables(cmd);
1103
	return ret;
1104
}
1105
EXPORT_SYMBOL(scsi_alloc_sgtables);
Linus Torvalds's avatar
Linus Torvalds committed
1106

1107
/**
1108
 * scsi_initialize_rq - initialize struct scsi_cmnd partially
1109
 * @rq: Request associated with the SCSI command to be initialized.
1110
 *
1111 1112 1113 1114
 * This function initializes the members of struct scsi_cmnd that must be
 * initialized before request processing starts and that won't be
 * reinitialized if a SCSI command is requeued.
 *
1115 1116
 * Called from inside blk_get_request() for pass-through requests and from
 * inside scsi_init_command() for filesystem requests.
1117
 */
1118
static void scsi_initialize_rq(struct request *rq)
1119
{
1120 1121 1122
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);

	scsi_req_init(&cmd->req);
1123
	init_rcu_head(&cmd->rcu);
1124 1125
	cmd->jiffies_at_alloc = jiffies;
	cmd->retries = 0;
1126 1127
}

1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
/*
 * Only called when the request isn't completed by SCSI, and not freed by
 * SCSI
 */
static void scsi_cleanup_rq(struct request *rq)
{
	if (rq->rq_flags & RQF_DONTPREP) {
		scsi_mq_uninit_cmd(blk_mq_rq_to_pdu(rq));
		rq->rq_flags &= ~RQF_DONTPREP;
	}
}

1140
/* Called before a request is prepared. See also scsi_mq_prep_fn(). */
1141
void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
1142
{
1143 1144
	void *buf = cmd->sense_buffer;
	void *prot = cmd->prot_sdb;
1145 1146
	struct request *rq = blk_mq_rq_from_pdu(cmd);
	unsigned int flags = cmd->flags & SCMD_PRESERVED_FLAGS;
1147
	unsigned long jiffies_at_alloc;
1148
	int retries, to_clear;
1149
	bool in_flight;
1150
	int budget_token = cmd->budget_token;
1151 1152 1153 1154 1155

	if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) {
		flags |= SCMD_INITIALIZED;
		scsi_initialize_rq(rq);
	}
1156

1157 1158
	jiffies_at_alloc = cmd->jiffies_at_alloc;
	retries = cmd->retries;
1159
	in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state);
1160 1161 1162 1163 1164 1165 1166 1167 1168
	/*
	 * Zero out the cmd, except for the embedded scsi_request. Only clear
	 * the driver-private command data if the LLD does not supply a
	 * function to initialize that data.
	 */
	to_clear = sizeof(*cmd) - sizeof(cmd->req);
	if (!dev->host->hostt->init_cmd_priv)
		to_clear += dev->host->hostt->cmd_size;
	memset((char *)cmd + sizeof(cmd->req), 0, to_clear);
1169

1170 1171 1172
	cmd->device = dev;
	cmd->sense_buffer = buf;
	cmd->prot_sdb = prot;
1173
	cmd->flags = flags;
1174
	INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
1175 1176
	cmd->jiffies_at_alloc = jiffies_at_alloc;
	cmd->retries = retries;
1177 1178
	if (in_flight)
		__set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
1179
	cmd->budget_token = budget_token;
1180

1181 1182
}

1183 1184
static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev,
		struct request *req)
James Bottomley's avatar
James Bottomley committed
1185
{
1186
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
1187 1188

	/*
1189
	 * Passthrough requests may transfer data, in which case they must
1190 1191 1192 1193 1194
	 * a bio attached to them.  Or they might contain a SCSI command
	 * that does not transfer data, in which case they may optionally
	 * submit a request without an attached bio.
	 */
	if (req->bio) {
1195
		blk_status_t ret = scsi_alloc_sgtables(cmd);
1196 1197
		if (unlikely(ret != BLK_STS_OK))
			return ret;
1198
	} else {
1199
		BUG_ON(blk_rq_bytes(req));
1200

1201
		memset(&cmd->sdb, 0, sizeof(cmd->sdb));
1202
	}
James Bottomley's avatar
James Bottomley committed
1203

1204
	cmd->cmd_len = scsi_req(req)->cmd_len;
1205 1206
	if (cmd->cmd_len == 0)
		cmd->cmd_len = scsi_command_size(cmd->cmnd);
1207
	cmd->cmnd = scsi_req(req)->cmd;
1208
	cmd->transfersize = blk_rq_bytes(req);
1209
	cmd->allowed = scsi_req(req)->retries;
1210
	return BLK_STS_OK;
James Bottomley's avatar
James Bottomley committed
1211 1212
}

1213
static blk_status_t
1214
scsi_device_state_check(struct scsi_device *sdev, struct request *req)
1215
{
1216
	switch (sdev->sdev_state) {
1217 1218
	case SDEV_CREATED:
		return BLK_STS_OK;
1219 1220 1221 1222 1223 1224 1225
	case SDEV_OFFLINE:
	case SDEV_TRANSPORT_OFFLINE:
		/*
		 * If the device is offline we refuse to process any
		 * commands.  The device must be brought online
		 * before trying any recovery commands.
		 */
1226 1227 1228 1229 1230
		if (!sdev->offline_already) {
			sdev->offline_already = true;
			sdev_printk(KERN_ERR, sdev,
				    "rejecting I/O to offline device\n");
		}
1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244
		return BLK_STS_IOERR;
	case SDEV_DEL:
		/*
		 * If the device is fully deleted, we refuse to
		 * process any commands as well.
		 */
		sdev_printk(KERN_ERR, sdev,
			    "rejecting I/O to dead device\n");
		return BLK_STS_IOERR;
	case SDEV_BLOCK:
	case SDEV_CREATED_BLOCK:
		return BLK_STS_RESOURCE;
	case SDEV_QUIESCE:
		/*
1245 1246
		 * If the device is blocked we only accept power management
		 * commands.
1247
		 */
1248
		if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM)))
1249 1250 1251 1252 1253
			return BLK_STS_RESOURCE;
		return BLK_STS_OK;
	default:
		/*
		 * For any other not fully online state we only allow
1254
		 * power management commands.
1255
		 */
1256
		if (req && !(req->rq_flags & RQF_PM))
1257 1258
			return BLK_STS_IOERR;
		return BLK_STS_OK;
Linus Torvalds's avatar
Linus Torvalds committed
1259
	}
1260
}
Linus Torvalds's avatar
Linus Torvalds committed
1261 1262

/*
1263 1264
 * scsi_dev_queue_ready: if we can send requests to sdev, assign one token
 * and return the token else return -1.
Linus Torvalds's avatar
Linus Torvalds committed
1265 1266 1267 1268
 */
static inline int scsi_dev_queue_ready(struct request_queue *q,
				  struct scsi_device *sdev)
{
1269
	int token;
1270

1271
	token = sbitmap_get(&sdev->budget_map);
1272
	if (atomic_read(&sdev->device_blocked)) {
1273 1274 1275 1276
		if (token < 0)
			goto out;

		if (scsi_device_busy(sdev) > 1)
1277 1278
			goto out_dec;

Linus Torvalds's avatar
Linus Torvalds committed
1279 1280 1281
		/*
		 * unblock after device_blocked iterates to zero
		 */
1282
		if (atomic_dec_return(&sdev->device_blocked) > 0)
1283 1284 1285
			goto out_dec;
		SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev,
				   "unblocking device at zero depth\n"));
Linus Torvalds's avatar
Linus Torvalds committed
1286
	}
1287

1288
	return token;
1289
out_dec:
1290 1291 1292 1293
	if (token >= 0)
		sbitmap_put(&sdev->budget_map, token);
out:
	return -1;
Linus Torvalds's avatar
Linus Torvalds committed
1294 1295
}

1296 1297 1298 1299 1300 1301 1302 1303
/*
 * scsi_target_queue_ready: checks if there we can send commands to target
 * @sdev: scsi device on starget to check.
 */
static inline int scsi_target_queue_ready(struct Scsi_Host *shost,
					   struct scsi_device *sdev)
{
	struct scsi_target *starget = scsi_target(sdev);
1304
	unsigned int busy;
1305 1306

	if (starget->single_lun) {
1307
		spin_lock_irq(shost->host_lock);
1308
		if (starget->starget_sdev_user &&
1309 1310 1311 1312
		    starget->starget_sdev_user != sdev) {
			spin_unlock_irq(shost->host_lock);
			return 0;
		}
1313
		starget->starget_sdev_user = sdev;
1314
		spin_unlock_irq(shost->host_lock);
1315 1316
	}

1317 1318 1319
	if (starget->can_queue <= 0)
		return 1;

1320
	busy = atomic_inc_return(&starget->target_busy) - 1;
1321
	if (atomic_read(&starget->target_blocked) > 0) {
1322 1323 1324
		if (busy)
			goto starved;

1325 1326 1327
		/*
		 * unblock after target_blocked iterates to zero
		 */
1328
		if (atomic_dec_return(&starget->target_blocked) > 0)
1329
			goto out_dec;
1330 1331 1332

		SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
				 "unblocking target at zero depth\n"));
1333 1334
	}

1335
	if (busy >= starget->can_queue)
1336
		goto starved;
1337

1338 1339 1340 1341 1342
	return 1;

starved:
	spin_lock_irq(shost->host_lock);
	list_move_tail(&sdev->starved_entry, &shost->starved_list);
1343
	spin_unlock_irq(shost->host_lock);
1344
out_dec:
1345 1346
	if (starget->can_queue > 0)
		atomic_dec(&starget->target_busy);
1347
	return 0;
1348 1349
}

Linus Torvalds's avatar
Linus Torvalds committed
1350 1351 1352 1353 1354 1355 1356
/*
 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
 * return 0. We must end up running the queue again whenever 0 is
 * returned, else IO can hang.
 */
static inline int scsi_host_queue_ready(struct request_queue *q,
				   struct Scsi_Host *shost,
1357 1358
				   struct scsi_device *sdev,
				   struct scsi_cmnd *cmd)
Linus Torvalds's avatar
Linus Torvalds committed
1359
{
1360
	if (scsi_host_in_recovery(shost))
1361 1362
		return 0;

1363
	if (atomic_read(&shost->host_blocked) > 0) {
1364
		if (scsi_host_busy(shost) > 0)
1365 1366
			goto starved;

Linus Torvalds's avatar
Linus Torvalds committed
1367 1368 1369
		/*
		 * unblock after host_blocked iterates to zero
		 */
1370
		if (atomic_dec_return(&shost->host_blocked) > 0)
1371
			goto out_dec;
1372 1373 1374 1375

		SCSI_LOG_MLQUEUE(3,
			shost_printk(KERN_INFO, shost,
				     "unblocking host at zero depth\n"));
Linus Torvalds's avatar
Linus Torvalds committed
1376
	}
1377 1378 1379

	if (shost->host_self_blocked)
		goto starved;
Linus Torvalds's avatar
Linus Torvalds committed
1380 1381

	/* We're OK to process the command, so we can't be starved */
1382 1383 1384 1385 1386 1387
	if (!list_empty(&sdev->starved_entry)) {
		spin_lock_irq(shost->host_lock);
		if (!list_empty(&sdev->starved_entry))
			list_del_init(&sdev->starved_entry);
		spin_unlock_irq(shost->host_lock);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1388

1389 1390
	__set_bit(SCMD_STATE_INFLIGHT, &cmd->state);

1391 1392 1393 1394 1395 1396
	return 1;

starved:
	spin_lock_irq(shost->host_lock);
	if (list_empty(&sdev->starved_entry))
		list_add_tail(&sdev->starved_entry, &shost->starved_list);
1397
	spin_unlock_irq(shost->host_lock);
1398
out_dec:
1399
	scsi_dec_host_busy(shost, cmd);
1400
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1401 1402
}

1403 1404 1405 1406 1407 1408 1409 1410
/*
 * Busy state exporting function for request stacking drivers.
 *
 * For efficiency, no lock is taken to check the busy state of
 * shost/starget/sdev, since the returned value is not guaranteed and
 * may be changed after request stacking drivers call the function,
 * regardless of taking lock or not.
 *
1411 1412 1413
 * When scsi can't dispatch I/Os anymore and needs to kill I/Os scsi
 * needs to return 'not busy'. Otherwise, request stacking drivers
 * may hold requests forever.
1414
 */
1415
static bool scsi_mq_lld_busy(struct request_queue *q)
1416 1417 1418 1419
{
	struct scsi_device *sdev = q->queuedata;
	struct Scsi_Host *shost;

1420
	if (blk_queue_dying(q))
1421
		return false;
1422 1423 1424

	shost = sdev->host;

1425 1426 1427 1428 1429 1430 1431
	/*
	 * Ignore host/starget busy state.
	 * Since block layer does not have a concept of fairness across
	 * multiple queues, congestion of host/starget needs to be handled
	 * in SCSI layer.
	 */
	if (scsi_host_in_recovery(shost) || scsi_device_is_busy(sdev))
1432
		return true;
1433

1434
	return false;
Linus Torvalds's avatar
Linus Torvalds committed
1435 1436
}

1437 1438 1439 1440 1441
/*
 * Block layer request completion callback. May be called from interrupt
 * context.
 */
static void scsi_complete(struct request *rq)
1442
{
1443
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
1444 1445 1446 1447
	int disposition;

	INIT_LIST_HEAD(&cmd->eh_entry);

1448 1449 1450 1451
	atomic_inc(&cmd->device->iodone_cnt);
	if (cmd->result)
		atomic_inc(&cmd->device->ioerr_cnt);

1452
	disposition = scsi_decide_disposition(cmd);
1453
	if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd))
1454
		disposition = SUCCESS;
1455

1456 1457 1458
	scsi_log_completion(cmd, disposition);

	switch (disposition) {
1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470
	case SUCCESS:
		scsi_finish_command(cmd);
		break;
	case NEEDS_RETRY:
		scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
		break;
	case ADD_TO_MLQUEUE:
		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
		break;
	default:
		scsi_eh_scmd_add(cmd);
		break;
1471 1472 1473
	}
}

1474
/**
1475
 * scsi_dispatch_cmd - Dispatch a command to the low-level driver.
1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554
 * @cmd: command block we are dispatching.
 *
 * Return: nonzero return request was rejected and device's queue needs to be
 * plugged.
 */
static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
{
	struct Scsi_Host *host = cmd->device->host;
	int rtn = 0;

	atomic_inc(&cmd->device->iorequest_cnt);

	/* check if the device is still usable */
	if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
		/* in SDEV_DEL we error all commands. DID_NO_CONNECT
		 * returns an immediate error upwards, and signals
		 * that the device is no longer present */
		cmd->result = DID_NO_CONNECT << 16;
		goto done;
	}

	/* Check to see if the scsi lld made this device blocked. */
	if (unlikely(scsi_device_blocked(cmd->device))) {
		/*
		 * in blocked state, the command is just put back on
		 * the device queue.  The suspend state has already
		 * blocked the queue so future requests should not
		 * occur until the device transitions out of the
		 * suspend state.
		 */
		SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
			"queuecommand : device blocked\n"));
		return SCSI_MLQUEUE_DEVICE_BUSY;
	}

	/* Store the LUN value in cmnd, if needed. */
	if (cmd->device->lun_in_cdb)
		cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |
			       (cmd->device->lun << 5 & 0xe0);

	scsi_log_send(cmd);

	/*
	 * Before we queue this command, check if the command
	 * length exceeds what the host adapter can handle.
	 */
	if (cmd->cmd_len > cmd->device->host->max_cmd_len) {
		SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
			       "queuecommand : command too long. "
			       "cdb_size=%d host->max_cmd_len=%d\n",
			       cmd->cmd_len, cmd->device->host->max_cmd_len));
		cmd->result = (DID_ABORT << 16);
		goto done;
	}

	if (unlikely(host->shost_state == SHOST_DEL)) {
		cmd->result = (DID_NO_CONNECT << 16);
		goto done;

	}

	trace_scsi_dispatch_cmd_start(cmd);
	rtn = host->hostt->queuecommand(host, cmd);
	if (rtn) {
		trace_scsi_dispatch_cmd_error(cmd, rtn);
		if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
		    rtn != SCSI_MLQUEUE_TARGET_BUSY)
			rtn = SCSI_MLQUEUE_HOST_BUSY;

		SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
			"queuecommand : request rejected\n"));
	}

	return rtn;
 done:
	cmd->scsi_done(cmd);
	return 0;
}

1555
/* Size in bytes of the sg-list stored in the scsi-mq command-private data. */
1556
static unsigned int scsi_mq_inline_sgl_size(struct Scsi_Host *shost)
1557
{
1558
	return min_t(unsigned int, shost->sg_tablesize, SCSI_INLINE_SG_CNT) *
1559 1560 1561
		sizeof(struct scatterlist);
}

1562
static blk_status_t scsi_prepare_cmd(struct request *req)
1563 1564 1565 1566 1567 1568
{
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
	struct scsi_device *sdev = req->q->queuedata;
	struct Scsi_Host *shost = sdev->host;
	struct scatterlist *sg;

1569
	scsi_init_command(sdev, cmd);
1570 1571 1572 1573

	cmd->request = req;
	cmd->tag = req->tag;
	cmd->prot_op = SCSI_PROT_NORMAL;
1574 1575 1576 1577
	if (blk_rq_bytes(req))
		cmd->sc_data_direction = rq_dma_dir(req);
	else
		cmd->sc_data_direction = DMA_NONE;
1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588

	sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
	cmd->sdb.table.sgl = sg;

	if (scsi_host_get_prot(shost)) {
		memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer));

		cmd->prot_sdb->table.sgl =
			(struct scatterlist *)(cmd->prot_sdb + 1);
	}

1589 1590 1591 1592 1593 1594 1595 1596 1597
	/*
	 * Special handling for passthrough commands, which don't go to the ULP
	 * at all:
	 */
	if (blk_rq_is_scsi(req))
		return scsi_setup_scsi_cmnd(sdev, req);

	if (sdev->handler && sdev->handler->prep_fn) {
		blk_status_t ret = sdev->handler->prep_fn(sdev, req);
1598

1599 1600 1601 1602 1603 1604 1605
		if (ret != BLK_STS_OK)
			return ret;
	}

	cmd->cmnd = scsi_req(req)->cmd = scsi_req(req)->__cmd;
	memset(cmd->cmnd, 0, BLK_MAX_CDB);
	return scsi_cmd_to_driver(cmd)->init_command(cmd);
1606 1607 1608 1609
}

static void scsi_mq_done(struct scsi_cmnd *cmd)
{
1610 1611
	if (unlikely(blk_should_fake_timeout(cmd->request->q)))
		return;
1612 1613
	if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state)))
		return;
1614
	trace_scsi_dispatch_cmd_done(cmd);
1615
	blk_mq_complete_request(cmd->request);
1616 1617
}

1618
static void scsi_mq_put_budget(struct request_queue *q, int budget_token)
1619
{
1620 1621
	struct scsi_device *sdev = q->queuedata;

1622
	sbitmap_put(&sdev->budget_map, budget_token);
1623 1624
}

1625
static int scsi_mq_get_budget(struct request_queue *q)
1626
{
1627
	struct scsi_device *sdev = q->queuedata;
1628
	int token = scsi_dev_queue_ready(q, sdev);
1629

1630 1631
	if (token >= 0)
		return token;
1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649

	atomic_inc(&sdev->restarts);

	/*
	 * Orders atomic_inc(&sdev->restarts) and atomic_read(&sdev->device_busy).
	 * .restarts must be incremented before .device_busy is read because the
	 * code in scsi_run_queue_async() depends on the order of these operations.
	 */
	smp_mb__after_atomic();

	/*
	 * If all in-flight requests originated from this LUN are completed
	 * before reading .device_busy, sdev->device_busy will be observed as
	 * zero, then blk_mq_delay_run_hw_queues() will dispatch this request
	 * soon. Otherwise, completion of one of these requests will observe
	 * the .restarts flag, and the request queue will be run for handling
	 * this request, see scsi_end_request().
	 */
1650
	if (unlikely(scsi_device_busy(sdev) == 0 &&
1651 1652
				!scsi_device_blocked(sdev)))
		blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY);
1653
	return -1;
1654 1655
}

1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669
static void scsi_mq_set_rq_budget_token(struct request *req, int token)
{
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);

	cmd->budget_token = token;
}

static int scsi_mq_get_rq_budget_token(struct request *req)
{
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);

	return cmd->budget_token;
}

1670
static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1671
			 const struct blk_mq_queue_data *bd)
1672
{
1673
	struct request *req = bd->rq;
1674 1675 1676 1677
	struct request_queue *q = req->q;
	struct scsi_device *sdev = q->queuedata;
	struct Scsi_Host *shost = sdev->host;
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
1678
	blk_status_t ret;
1679 1680
	int reason;

1681 1682
	WARN_ON_ONCE(cmd->budget_token < 0);

1683 1684 1685 1686 1687
	/*
	 * If the device is not in running state we will reject some or all
	 * commands.
	 */
	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1688
		ret = scsi_device_state_check(sdev, req);
1689 1690 1691
		if (ret != BLK_STS_OK)
			goto out_put_budget;
	}
1692

1693
	ret = BLK_STS_RESOURCE;
1694
	if (!scsi_target_queue_ready(shost, sdev))
1695
		goto out_put_budget;
1696
	if (!scsi_host_queue_ready(q, shost, sdev, cmd))
1697 1698
		goto out_dec_target_busy;

1699
	if (!(req->rq_flags & RQF_DONTPREP)) {
1700
		ret = scsi_prepare_cmd(req);
1701
		if (ret != BLK_STS_OK)
1702
			goto out_dec_host_busy;
1703
		req->rq_flags |= RQF_DONTPREP;
1704
	} else {
1705
		clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
1706 1707
	}

1708
	cmd->flags &= SCMD_PRESERVED_FLAGS;
1709 1710
	if (sdev->simple_tags)
		cmd->flags |= SCMD_TAGGED;
1711 1712
	if (bd->last)
		cmd->flags |= SCMD_LAST;
1713

1714 1715
	scsi_set_resid(cmd, 0);
	memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
1716 1717
	cmd->scsi_done = scsi_mq_done;

1718
	blk_mq_start_request(req);
1719 1720 1721
	reason = scsi_dispatch_cmd(cmd);
	if (reason) {
		scsi_set_blocked(cmd, reason);
1722
		ret = BLK_STS_RESOURCE;
1723 1724 1725
		goto out_dec_host_busy;
	}

1726
	return BLK_STS_OK;
1727 1728

out_dec_host_busy:
1729
	scsi_dec_host_busy(shost, cmd);
1730 1731 1732
out_dec_target_busy:
	if (scsi_target(sdev)->can_queue > 0)
		atomic_dec(&scsi_target(sdev)->target_busy);
1733
out_put_budget:
1734 1735
	scsi_mq_put_budget(q, cmd->budget_token);
	cmd->budget_token = -1;
1736
	switch (ret) {
1737 1738 1739
	case BLK_STS_OK:
		break;
	case BLK_STS_RESOURCE:
1740
	case BLK_STS_ZONE_RESOURCE:
1741
		if (scsi_device_blocked(sdev))
1742
			ret = BLK_STS_DEV_RESOURCE;
1743
		break;
1744 1745 1746 1747 1748
	case BLK_STS_AGAIN:
		scsi_req(req)->result = DID_BUS_BUSY << 16;
		if (req->rq_flags & RQF_DONTPREP)
			scsi_mq_uninit_cmd(cmd);
		break;
1749
	default:
1750 1751 1752 1753
		if (unlikely(!scsi_device_online(sdev)))
			scsi_req(req)->result = DID_NO_CONNECT << 16;
		else
			scsi_req(req)->result = DID_ERROR << 16;
1754
		/*
1755
		 * Make sure to release all allocated resources when
1756 1757 1758
		 * we hit an error, as we will never see this command
		 * again.
		 */
1759
		if (req->rq_flags & RQF_DONTPREP)
1760
			scsi_mq_uninit_cmd(cmd);
1761
		scsi_run_queue_async(sdev);
1762 1763 1764 1765 1766
		break;
	}
	return ret;
}

1767 1768 1769 1770 1771 1772 1773 1774
static enum blk_eh_timer_return scsi_timeout(struct request *req,
		bool reserved)
{
	if (reserved)
		return BLK_EH_RESET_TIMER;
	return scsi_times_out(req);
}

1775 1776
static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
				unsigned int hctx_idx, unsigned int numa_node)
1777
{
1778
	struct Scsi_Host *shost = set->driver_data;
1779
	const bool unchecked_isa_dma = shost->unchecked_isa_dma;
1780
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
1781
	struct scatterlist *sg;
1782
	int ret = 0;
1783

1784 1785 1786 1787
	if (unchecked_isa_dma)
		cmd->flags |= SCMD_UNCHECKED_ISA_DMA;
	cmd->sense_buffer = scsi_alloc_sense_buffer(unchecked_isa_dma,
						    GFP_KERNEL, numa_node);
1788 1789
	if (!cmd->sense_buffer)
		return -ENOMEM;
1790
	cmd->req.sense = cmd->sense_buffer;
1791 1792 1793 1794

	if (scsi_host_get_prot(shost)) {
		sg = (void *)cmd + sizeof(struct scsi_cmnd) +
			shost->hostt->cmd_size;
1795
		cmd->prot_sdb = (void *)sg + scsi_mq_inline_sgl_size(shost);
1796 1797
	}

1798 1799 1800 1801 1802 1803 1804 1805
	if (shost->hostt->init_cmd_priv) {
		ret = shost->hostt->init_cmd_priv(shost, cmd);
		if (ret < 0)
			scsi_free_sense_buffer(unchecked_isa_dma,
					       cmd->sense_buffer);
	}

	return ret;
1806 1807
}

1808 1809
static void scsi_mq_exit_request(struct blk_mq_tag_set *set, struct request *rq,
				 unsigned int hctx_idx)
1810
{
1811
	struct Scsi_Host *shost = set->driver_data;
1812 1813
	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);

1814 1815
	if (shost->hostt->exit_cmd_priv)
		shost->hostt->exit_cmd_priv(shost, cmd);
1816 1817
	scsi_free_sense_buffer(cmd->flags & SCMD_UNCHECKED_ISA_DMA,
			       cmd->sense_buffer);
1818 1819
}

1820 1821 1822

static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx)
{
1823
	struct Scsi_Host *shost = hctx->driver_data;
1824 1825 1826 1827 1828 1829 1830

	if (shost->hostt->mq_poll)
		return shost->hostt->mq_poll(shost, hctx->queue_num);

	return 0;
}

1831 1832 1833 1834 1835 1836 1837 1838 1839
static int scsi_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
			  unsigned int hctx_idx)
{
	struct Scsi_Host *shost = data;

	hctx->driver_data = shost;
	return 0;
}

1840 1841 1842 1843 1844 1845
static int scsi_map_queues(struct blk_mq_tag_set *set)
{
	struct Scsi_Host *shost = container_of(set, struct Scsi_Host, tag_set);

	if (shost->hostt->map_queues)
		return shost->hostt->map_queues(shost);
1846
	return blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
1847 1848
}

1849
void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
1850
{
1851
	struct device *dev = shost->dma_dev;
Linus Torvalds's avatar
Linus Torvalds committed
1852

1853 1854 1855
	/*
	 * this limit is imposed by hardware restrictions
	 */
1856
	blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize,
1857
					SG_MAX_SEGMENTS));
1858

1859 1860 1861 1862 1863 1864 1865 1866
	if (scsi_host_prot_dma(shost)) {
		shost->sg_prot_tablesize =
			min_not_zero(shost->sg_prot_tablesize,
				     (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS);
		BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize);
		blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
	}

1867 1868 1869 1870
	if (dev->dma_mask) {
		shost->max_sectors = min_t(unsigned int, shost->max_sectors,
				dma_max_mapping_size(dev) >> SECTOR_SHIFT);
	}
1871
	blk_queue_max_hw_sectors(q, shost->max_sectors);
1872 1873
	if (shost->unchecked_isa_dma)
		blk_queue_bounce_limit(q, BLK_BOUNCE_ISA);
Linus Torvalds's avatar
Linus Torvalds committed
1874
	blk_queue_segment_boundary(q, shost->dma_boundary);
1875
	dma_set_seg_boundary(dev, shost->dma_boundary);
Linus Torvalds's avatar
Linus Torvalds committed
1876

1877
	blk_queue_max_segment_size(q, shost->max_segment_size);
1878 1879
	blk_queue_virt_boundary(q, shost->virt_boundary_mask);
	dma_set_max_seg_size(dev, queue_max_segment_size(q));
1880 1881

	/*
1882 1883 1884 1885 1886
	 * Set a reasonable default alignment:  The larger of 32-byte (dword),
	 * which is a common minimum for HBAs, and the minimum DMA alignment,
	 * which is set by the platform.
	 *
	 * Devices that require a bigger alignment can increase it later.
1887
	 */
1888
	blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1);
1889
}
1890
EXPORT_SYMBOL_GPL(__scsi_init_queue);
1891

1892 1893 1894 1895
static const struct blk_mq_ops scsi_mq_ops_no_commit = {
	.get_budget	= scsi_mq_get_budget,
	.put_budget	= scsi_mq_put_budget,
	.queue_rq	= scsi_queue_rq,
1896
	.complete	= scsi_complete,
1897 1898 1899 1900 1901 1902 1903
	.timeout	= scsi_timeout,
#ifdef CONFIG_BLK_DEBUG_FS
	.show_rq	= scsi_show_rq,
#endif
	.init_request	= scsi_mq_init_request,
	.exit_request	= scsi_mq_exit_request,
	.initialize_rq_fn = scsi_initialize_rq,
1904
	.cleanup_rq	= scsi_cleanup_rq,
1905 1906
	.busy		= scsi_mq_lld_busy,
	.map_queues	= scsi_map_queues,
1907
	.init_hctx	= scsi_init_hctx,
1908
	.poll		= scsi_mq_poll,
1909 1910
	.set_rq_budget_token = scsi_mq_set_rq_budget_token,
	.get_rq_budget_token = scsi_mq_get_rq_budget_token,
1911 1912 1913 1914 1915
};


static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx)
{
1916
	struct Scsi_Host *shost = hctx->driver_data;
1917 1918 1919 1920

	shost->hostt->commit_rqs(shost, hctx->queue_num);
}

1921
static const struct blk_mq_ops scsi_mq_ops = {
1922 1923
	.get_budget	= scsi_mq_get_budget,
	.put_budget	= scsi_mq_put_budget,
1924
	.queue_rq	= scsi_queue_rq,
1925
	.commit_rqs	= scsi_commit_rqs,
1926
	.complete	= scsi_complete,
1927
	.timeout	= scsi_timeout,
1928 1929 1930
#ifdef CONFIG_BLK_DEBUG_FS
	.show_rq	= scsi_show_rq,
#endif
1931 1932
	.init_request	= scsi_mq_init_request,
	.exit_request	= scsi_mq_exit_request,
1933
	.initialize_rq_fn = scsi_initialize_rq,
1934
	.cleanup_rq	= scsi_cleanup_rq,
1935
	.busy		= scsi_mq_lld_busy,
1936
	.map_queues	= scsi_map_queues,
1937
	.init_hctx	= scsi_init_hctx,
1938
	.poll		= scsi_mq_poll,
1939 1940
	.set_rq_budget_token = scsi_mq_set_rq_budget_token,
	.get_rq_budget_token = scsi_mq_get_rq_budget_token,
1941 1942 1943 1944 1945 1946 1947 1948 1949 1950
};

struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
{
	sdev->request_queue = blk_mq_init_queue(&sdev->host->tag_set);
	if (IS_ERR(sdev->request_queue))
		return NULL;

	sdev->request_queue->queuedata = sdev;
	__scsi_init_queue(sdev->host, sdev->request_queue);
1951
	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, sdev->request_queue);
1952 1953 1954 1955 1956
	return sdev->request_queue;
}

int scsi_mq_setup_tags(struct Scsi_Host *shost)
{
1957
	unsigned int cmd_size, sgl_size;
1958
	struct blk_mq_tag_set *tag_set = &shost->tag_set;
1959

1960 1961
	sgl_size = max_t(unsigned int, sizeof(struct scatterlist),
				scsi_mq_inline_sgl_size(shost));
1962 1963
	cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
	if (scsi_host_get_prot(shost))
1964 1965
		cmd_size += sizeof(struct scsi_data_buffer) +
			sizeof(struct scatterlist) * SCSI_INLINE_PROT_SG_CNT;
1966

1967
	memset(tag_set, 0, sizeof(*tag_set));
1968
	if (shost->hostt->commit_rqs)
1969
		tag_set->ops = &scsi_mq_ops;
1970
	else
1971 1972
		tag_set->ops = &scsi_mq_ops_no_commit;
	tag_set->nr_hw_queues = shost->nr_hw_queues ? : 1;
1973
	tag_set->nr_maps = shost->nr_maps ? : 1;
1974 1975 1976 1977 1978
	tag_set->queue_depth = shost->can_queue;
	tag_set->cmd_size = cmd_size;
	tag_set->numa_node = NUMA_NO_NODE;
	tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
	tag_set->flags |=
1979
		BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
1980
	tag_set->driver_data = shost;
1981 1982
	if (shost->host_tagset)
		tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
1983

1984
	return blk_mq_alloc_tag_set(tag_set);
1985 1986 1987 1988 1989 1990 1991
}

void scsi_mq_destroy_tags(struct Scsi_Host *shost)
{
	blk_mq_free_tag_set(&shost->tag_set);
}

1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
/**
 * scsi_device_from_queue - return sdev associated with a request_queue
 * @q: The request queue to return the sdev from
 *
 * Return the sdev associated with a request queue or NULL if the
 * request_queue does not reference a SCSI device.
 */
struct scsi_device *scsi_device_from_queue(struct request_queue *q)
{
	struct scsi_device *sdev = NULL;

2003 2004
	if (q->mq_ops == &scsi_mq_ops_no_commit ||
	    q->mq_ops == &scsi_mq_ops)
2005 2006 2007 2008 2009 2010 2011
		sdev = q->queuedata;
	if (!sdev || !get_device(&sdev->sdev_gendev))
		sdev = NULL;

	return sdev;
}

2012 2013 2014 2015
/**
 * scsi_block_requests - Utility function used by low-level drivers to prevent
 * further commands from being queued to the device.
 * @shost:  host in question
Linus Torvalds's avatar
Linus Torvalds committed
2016
 *
2017 2018
 * There is no timer nor any other means by which the requests get unblocked
 * other than the low-level driver calling scsi_unblock_requests().
Linus Torvalds's avatar
Linus Torvalds committed
2019 2020 2021 2022 2023 2024 2025
 */
void scsi_block_requests(struct Scsi_Host *shost)
{
	shost->host_self_blocked = 1;
}
EXPORT_SYMBOL(scsi_block_requests);

2026 2027 2028 2029
/**
 * scsi_unblock_requests - Utility function used by low-level drivers to allow
 * further commands to be queued to the device.
 * @shost:  host in question
Linus Torvalds's avatar
Linus Torvalds committed
2030
 *
2031 2032 2033 2034
 * There is no timer nor any other means by which the requests get unblocked
 * other than the low-level driver calling scsi_unblock_requests(). This is done
 * as an API function so that changes to the internals of the scsi mid-layer
 * won't require wholesale changes to drivers that use this feature.
Linus Torvalds's avatar
Linus Torvalds committed
2035 2036 2037 2038 2039 2040 2041 2042 2043 2044
 */
void scsi_unblock_requests(struct Scsi_Host *shost)
{
	shost->host_self_blocked = 0;
	scsi_run_host_queues(shost);
}
EXPORT_SYMBOL(scsi_unblock_requests);

void scsi_exit_queue(void)
{
2045 2046
	kmem_cache_destroy(scsi_sense_cache);
	kmem_cache_destroy(scsi_sense_isadma_cache);
Linus Torvalds's avatar
Linus Torvalds committed
2047
}
2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059

/**
 *	scsi_mode_select - issue a mode select
 *	@sdev:	SCSI device to be queried
 *	@pf:	Page format bit (1 == standard, 0 == vendor specific)
 *	@sp:	Save page bit (0 == don't save, 1 == save)
 *	@modepage: mode page being requested
 *	@buffer: request buffer (may not be smaller than eight bytes)
 *	@len:	length of request buffer.
 *	@timeout: command timeout
 *	@retries: number of retries before failing
 *	@data: returns a structure abstracting the mode header data
2060
 *	@sshdr: place to put sense data (or NULL if no sense to be collected).
2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118
 *		must be SCSI_SENSE_BUFFERSIZE big.
 *
 *	Returns zero if successful; negative error number or scsi
 *	status on error
 *
 */
int
scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage,
		 unsigned char *buffer, int len, int timeout, int retries,
		 struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
{
	unsigned char cmd[10];
	unsigned char *real_buffer;
	int ret;

	memset(cmd, 0, sizeof(cmd));
	cmd[1] = (pf ? 0x10 : 0) | (sp ? 0x01 : 0);

	if (sdev->use_10_for_ms) {
		if (len > 65535)
			return -EINVAL;
		real_buffer = kmalloc(8 + len, GFP_KERNEL);
		if (!real_buffer)
			return -ENOMEM;
		memcpy(real_buffer + 8, buffer, len);
		len += 8;
		real_buffer[0] = 0;
		real_buffer[1] = 0;
		real_buffer[2] = data->medium_type;
		real_buffer[3] = data->device_specific;
		real_buffer[4] = data->longlba ? 0x01 : 0;
		real_buffer[5] = 0;
		real_buffer[6] = data->block_descriptor_length >> 8;
		real_buffer[7] = data->block_descriptor_length;

		cmd[0] = MODE_SELECT_10;
		cmd[7] = len >> 8;
		cmd[8] = len;
	} else {
		if (len > 255 || data->block_descriptor_length > 255 ||
		    data->longlba)
			return -EINVAL;

		real_buffer = kmalloc(4 + len, GFP_KERNEL);
		if (!real_buffer)
			return -ENOMEM;
		memcpy(real_buffer + 4, buffer, len);
		len += 4;
		real_buffer[0] = 0;
		real_buffer[1] = data->medium_type;
		real_buffer[2] = data->device_specific;
		real_buffer[3] = data->block_descriptor_length;

		cmd[0] = MODE_SELECT;
		cmd[4] = len;
	}

	ret = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, real_buffer, len,
2119
			       sshdr, timeout, retries, NULL);
2120 2121 2122 2123 2124
	kfree(real_buffer);
	return ret;
}
EXPORT_SYMBOL_GPL(scsi_mode_select);

Linus Torvalds's avatar
Linus Torvalds committed
2125
/**
2126
 *	scsi_mode_sense - issue a mode sense, falling back from 10 to six bytes if necessary.
2127
 *	@sdev:	SCSI device to be queried
Linus Torvalds's avatar
Linus Torvalds committed
2128 2129 2130 2131 2132 2133 2134
 *	@dbd:	set if mode sense will allow block descriptors to be returned
 *	@modepage: mode page being requested
 *	@buffer: request buffer (may not be smaller than eight bytes)
 *	@len:	length of request buffer.
 *	@timeout: command timeout
 *	@retries: number of retries before failing
 *	@data: returns a structure abstracting the mode header data
2135
 *	@sshdr: place to put sense data (or NULL if no sense to be collected).
2136
 *		must be SCSI_SENSE_BUFFERSIZE big.
Linus Torvalds's avatar
Linus Torvalds committed
2137 2138 2139 2140
 *
 *	Returns zero if unsuccessful, or the header offset (either 4
 *	or 8 depending on whether a six or ten byte command was
 *	issued) if successful.
2141
 */
Linus Torvalds's avatar
Linus Torvalds committed
2142
int
2143
scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
Linus Torvalds's avatar
Linus Torvalds committed
2144
		  unsigned char *buffer, int len, int timeout, int retries,
2145 2146
		  struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr)
{
Linus Torvalds's avatar
Linus Torvalds committed
2147 2148 2149
	unsigned char cmd[12];
	int use_10_for_ms;
	int header_length;
2150
	int result, retry_count = retries;
2151
	struct scsi_sense_hdr my_sshdr;
Linus Torvalds's avatar
Linus Torvalds committed
2152 2153 2154

	memset(data, 0, sizeof(*data));
	memset(&cmd[0], 0, 12);
2155 2156

	dbd = sdev->set_dbd_for_ms ? 8 : dbd;
Linus Torvalds's avatar
Linus Torvalds committed
2157 2158 2159
	cmd[1] = dbd & 0x18;	/* allows DBD and LLBA bits */
	cmd[2] = modepage;

2160 2161 2162 2163
	/* caller might not be interested in sense, but we need it */
	if (!sshdr)
		sshdr = &my_sshdr;

Linus Torvalds's avatar
Linus Torvalds committed
2164
 retry:
2165
	use_10_for_ms = sdev->use_10_for_ms;
Linus Torvalds's avatar
Linus Torvalds committed
2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184

	if (use_10_for_ms) {
		if (len < 8)
			len = 8;

		cmd[0] = MODE_SENSE_10;
		cmd[8] = len;
		header_length = 8;
	} else {
		if (len < 4)
			len = 4;

		cmd[0] = MODE_SENSE;
		cmd[4] = len;
		header_length = 4;
	}

	memset(buffer, 0, len);

2185
	result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
2186
				  sshdr, timeout, retries, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
2187 2188 2189 2190 2191 2192

	/* This code looks awful: what it's doing is making sure an
	 * ILLEGAL REQUEST sense return identifies the actual command
	 * byte as the problem.  MODE_SENSE commands can return
	 * ILLEGAL REQUEST if the code page isn't supported */

2193
	if (use_10_for_ms && !scsi_status_is_good(result) &&
2194
	    driver_byte(result) == DRIVER_SENSE) {
2195 2196 2197
		if (scsi_sense_valid(sshdr)) {
			if ((sshdr->sense_key == ILLEGAL_REQUEST) &&
			    (sshdr->asc == 0x20) && (sshdr->ascq == 0)) {
2198
				/*
Linus Torvalds's avatar
Linus Torvalds committed
2199 2200
				 * Invalid command operation code
				 */
2201
				sdev->use_10_for_ms = 0;
Linus Torvalds's avatar
Linus Torvalds committed
2202 2203 2204 2205 2206
				goto retry;
			}
		}
	}

2207
	if (scsi_status_is_good(result)) {
2208 2209 2210 2211 2212 2213 2214 2215 2216
		if (unlikely(buffer[0] == 0x86 && buffer[1] == 0x0b &&
			     (modepage == 6 || modepage == 8))) {
			/* Initio breakage? */
			header_length = 0;
			data->length = 13;
			data->medium_type = 0;
			data->device_specific = 0;
			data->longlba = 0;
			data->block_descriptor_length = 0;
2217
		} else if (use_10_for_ms) {
Linus Torvalds's avatar
Linus Torvalds committed
2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229
			data->length = buffer[0]*256 + buffer[1] + 2;
			data->medium_type = buffer[2];
			data->device_specific = buffer[3];
			data->longlba = buffer[4] & 0x01;
			data->block_descriptor_length = buffer[6]*256
				+ buffer[7];
		} else {
			data->length = buffer[0] + 1;
			data->medium_type = buffer[1];
			data->device_specific = buffer[2];
			data->block_descriptor_length = buffer[3];
		}
2230
		data->header_length = header_length;
2231 2232 2233 2234 2235
	} else if ((status_byte(result) == CHECK_CONDITION) &&
		   scsi_sense_valid(sshdr) &&
		   sshdr->sense_key == UNIT_ATTENTION && retry_count) {
		retry_count--;
		goto retry;
Linus Torvalds's avatar
Linus Torvalds committed
2236 2237
	}

2238
	return result;
Linus Torvalds's avatar
Linus Torvalds committed
2239 2240 2241
}
EXPORT_SYMBOL(scsi_mode_sense);

2242 2243 2244 2245 2246
/**
 *	scsi_test_unit_ready - test if unit is ready
 *	@sdev:	scsi device to change the state of.
 *	@timeout: command timeout
 *	@retries: number of retries before failing
2247
 *	@sshdr: outpout pointer for decoded sense information.
2248 2249
 *
 *	Returns zero if unsuccessful or an error if TUR failed.  For
2250
 *	removable media, UNIT_ATTENTION sets ->changed flag.
2251
 **/
Linus Torvalds's avatar
Linus Torvalds committed
2252
int
2253
scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
2254
		     struct scsi_sense_hdr *sshdr)
Linus Torvalds's avatar
Linus Torvalds committed
2255 2256 2257 2258 2259
{
	char cmd[] = {
		TEST_UNIT_READY, 0, 0, 0, 0, 0,
	};
	int result;
2260 2261 2262 2263

	/* try to eat the UNIT_ATTENTION if there are enough retries */
	do {
		result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, sshdr,
2264
					  timeout, 1, NULL);
2265 2266 2267 2268 2269
		if (sdev->removable && scsi_sense_valid(sshdr) &&
		    sshdr->sense_key == UNIT_ATTENTION)
			sdev->changed = 1;
	} while (scsi_sense_valid(sshdr) &&
		 sshdr->sense_key == UNIT_ATTENTION && --retries);
2270

Linus Torvalds's avatar
Linus Torvalds committed
2271 2272 2273 2274 2275
	return result;
}
EXPORT_SYMBOL(scsi_test_unit_ready);

/**
2276
 *	scsi_device_set_state - Take the given device through the device state model.
Linus Torvalds's avatar
Linus Torvalds committed
2277 2278 2279
 *	@sdev:	scsi device to change the state of.
 *	@state:	state to change to.
 *
2280
 *	Returns zero if successful or an error if the requested
Linus Torvalds's avatar
Linus Torvalds committed
2281
 *	transition is illegal.
2282
 */
Linus Torvalds's avatar
Linus Torvalds committed
2283 2284 2285 2286 2287 2288 2289 2290 2291 2292
int
scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
{
	enum scsi_device_state oldstate = sdev->sdev_state;

	if (state == oldstate)
		return 0;

	switch (state) {
	case SDEV_CREATED:
2293 2294 2295 2296 2297 2298 2299
		switch (oldstate) {
		case SDEV_CREATED_BLOCK:
			break;
		default:
			goto illegal;
		}
		break;
2300

Linus Torvalds's avatar
Linus Torvalds committed
2301 2302 2303 2304
	case SDEV_RUNNING:
		switch (oldstate) {
		case SDEV_CREATED:
		case SDEV_OFFLINE:
2305
		case SDEV_TRANSPORT_OFFLINE:
Linus Torvalds's avatar
Linus Torvalds committed
2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317
		case SDEV_QUIESCE:
		case SDEV_BLOCK:
			break;
		default:
			goto illegal;
		}
		break;

	case SDEV_QUIESCE:
		switch (oldstate) {
		case SDEV_RUNNING:
		case SDEV_OFFLINE:
2318
		case SDEV_TRANSPORT_OFFLINE:
Linus Torvalds's avatar
Linus Torvalds committed
2319 2320 2321 2322 2323 2324 2325
			break;
		default:
			goto illegal;
		}
		break;

	case SDEV_OFFLINE:
2326
	case SDEV_TRANSPORT_OFFLINE:
Linus Torvalds's avatar
Linus Torvalds committed
2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340
		switch (oldstate) {
		case SDEV_CREATED:
		case SDEV_RUNNING:
		case SDEV_QUIESCE:
		case SDEV_BLOCK:
			break;
		default:
			goto illegal;
		}
		break;

	case SDEV_BLOCK:
		switch (oldstate) {
		case SDEV_RUNNING:
2341
		case SDEV_CREATED_BLOCK:
2342
		case SDEV_QUIESCE:
2343
		case SDEV_OFFLINE:
2344 2345 2346 2347 2348 2349 2350 2351 2352
			break;
		default:
			goto illegal;
		}
		break;

	case SDEV_CREATED_BLOCK:
		switch (oldstate) {
		case SDEV_CREATED:
Linus Torvalds's avatar
Linus Torvalds committed
2353 2354 2355 2356 2357 2358 2359 2360 2361 2362
			break;
		default:
			goto illegal;
		}
		break;

	case SDEV_CANCEL:
		switch (oldstate) {
		case SDEV_CREATED:
		case SDEV_RUNNING:
2363
		case SDEV_QUIESCE:
Linus Torvalds's avatar
Linus Torvalds committed
2364
		case SDEV_OFFLINE:
2365
		case SDEV_TRANSPORT_OFFLINE:
Linus Torvalds's avatar
Linus Torvalds committed
2366 2367 2368 2369 2370 2371 2372 2373
			break;
		default:
			goto illegal;
		}
		break;

	case SDEV_DEL:
		switch (oldstate) {
2374 2375 2376
		case SDEV_CREATED:
		case SDEV_RUNNING:
		case SDEV_OFFLINE:
2377
		case SDEV_TRANSPORT_OFFLINE:
Linus Torvalds's avatar
Linus Torvalds committed
2378
		case SDEV_CANCEL:
2379
		case SDEV_BLOCK:
2380
		case SDEV_CREATED_BLOCK:
Linus Torvalds's avatar
Linus Torvalds committed
2381 2382 2383 2384 2385 2386 2387
			break;
		default:
			goto illegal;
		}
		break;

	}
2388
	sdev->offline_already = false;
Linus Torvalds's avatar
Linus Torvalds committed
2389 2390 2391 2392
	sdev->sdev_state = state;
	return 0;

 illegal:
2393
	SCSI_LOG_ERROR_RECOVERY(1,
2394
				sdev_printk(KERN_ERR, sdev,
2395
					    "Illegal state transition %s->%s",
2396 2397
					    scsi_device_state_name(oldstate),
					    scsi_device_state_name(state))
Linus Torvalds's avatar
Linus Torvalds committed
2398 2399 2400 2401 2402
				);
	return -EINVAL;
}
EXPORT_SYMBOL(scsi_device_set_state);

2403
/**
2404
 *	scsi_evt_emit - emit a single SCSI device uevent
2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418
 *	@sdev: associated SCSI device
 *	@evt: event to emit
 *
 *	Send a single uevent (scsi_event) to the associated scsi_device.
 */
static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt)
{
	int idx = 0;
	char *envp[3];

	switch (evt->evt_type) {
	case SDEV_EVT_MEDIA_CHANGE:
		envp[idx++] = "SDEV_MEDIA_CHANGE=1";
		break;
2419
	case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
2420
		scsi_rescan_device(&sdev->sdev_gendev);
2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434
		envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED";
		break;
	case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
		envp[idx++] = "SDEV_UA=CAPACITY_DATA_HAS_CHANGED";
		break;
	case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED:
	       envp[idx++] = "SDEV_UA=THIN_PROVISIONING_SOFT_THRESHOLD_REACHED";
		break;
	case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED:
		envp[idx++] = "SDEV_UA=MODE_PARAMETERS_CHANGED";
		break;
	case SDEV_EVT_LUN_CHANGE_REPORTED:
		envp[idx++] = "SDEV_UA=REPORTED_LUNS_DATA_HAS_CHANGED";
		break;
2435 2436 2437
	case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED:
		envp[idx++] = "SDEV_UA=ASYMMETRIC_ACCESS_STATE_CHANGED";
		break;
2438 2439 2440
	case SDEV_EVT_POWER_ON_RESET_OCCURRED:
		envp[idx++] = "SDEV_UA=POWER_ON_RESET_OCCURRED";
		break;
2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451
	default:
		/* do nothing */
		break;
	}

	envp[idx++] = NULL;

	kobject_uevent_env(&sdev->sdev_gendev.kobj, KOBJ_CHANGE, envp);
}

/**
2452
 *	scsi_evt_thread - send a uevent for each scsi event
2453 2454 2455 2456 2457 2458 2459 2460
 *	@work: work struct for scsi_device
 *
 *	Dispatch queued events to their associated scsi_device kobjects
 *	as uevents.
 */
void scsi_evt_thread(struct work_struct *work)
{
	struct scsi_device *sdev;
2461
	enum scsi_device_event evt_type;
2462 2463 2464 2465
	LIST_HEAD(event_list);

	sdev = container_of(work, struct scsi_device, event_work);

2466 2467 2468 2469
	for (evt_type = SDEV_EVT_FIRST; evt_type <= SDEV_EVT_LAST; evt_type++)
		if (test_and_clear_bit(evt_type, sdev->pending_events))
			sdev_evt_send_simple(sdev, evt_type, GFP_KERNEL);

2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501
	while (1) {
		struct scsi_event *evt;
		struct list_head *this, *tmp;
		unsigned long flags;

		spin_lock_irqsave(&sdev->list_lock, flags);
		list_splice_init(&sdev->event_list, &event_list);
		spin_unlock_irqrestore(&sdev->list_lock, flags);

		if (list_empty(&event_list))
			break;

		list_for_each_safe(this, tmp, &event_list) {
			evt = list_entry(this, struct scsi_event, node);
			list_del(&evt->node);
			scsi_evt_emit(sdev, evt);
			kfree(evt);
		}
	}
}

/**
 * 	sdev_evt_send - send asserted event to uevent thread
 *	@sdev: scsi_device event occurred on
 *	@evt: event to send
 *
 *	Assert scsi device event asynchronously.
 */
void sdev_evt_send(struct scsi_device *sdev, struct scsi_event *evt)
{
	unsigned long flags;

2502 2503 2504 2505
#if 0
	/* FIXME: currently this check eliminates all media change events
	 * for polled devices.  Need to update to discriminate between AN
	 * and polled events */
2506 2507 2508 2509
	if (!test_bit(evt->evt_type, sdev->supported_events)) {
		kfree(evt);
		return;
	}
2510
#endif
2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538

	spin_lock_irqsave(&sdev->list_lock, flags);
	list_add_tail(&evt->node, &sdev->event_list);
	schedule_work(&sdev->event_work);
	spin_unlock_irqrestore(&sdev->list_lock, flags);
}
EXPORT_SYMBOL_GPL(sdev_evt_send);

/**
 * 	sdev_evt_alloc - allocate a new scsi event
 *	@evt_type: type of event to allocate
 *	@gfpflags: GFP flags for allocation
 *
 *	Allocates and returns a new scsi_event.
 */
struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type,
				  gfp_t gfpflags)
{
	struct scsi_event *evt = kzalloc(sizeof(struct scsi_event), gfpflags);
	if (!evt)
		return NULL;

	evt->evt_type = evt_type;
	INIT_LIST_HEAD(&evt->node);

	/* evt_type-specific initialization, if any */
	switch (evt_type) {
	case SDEV_EVT_MEDIA_CHANGE:
2539 2540 2541 2542 2543
	case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
	case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
	case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED:
	case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED:
	case SDEV_EVT_LUN_CHANGE_REPORTED:
2544
	case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED:
2545
	case SDEV_EVT_POWER_ON_RESET_OCCURRED:
2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576
	default:
		/* do nothing */
		break;
	}

	return evt;
}
EXPORT_SYMBOL_GPL(sdev_evt_alloc);

/**
 * 	sdev_evt_send_simple - send asserted event to uevent thread
 *	@sdev: scsi_device event occurred on
 *	@evt_type: type of event to send
 *	@gfpflags: GFP flags for allocation
 *
 *	Assert scsi device event asynchronously, given an event type.
 */
void sdev_evt_send_simple(struct scsi_device *sdev,
			  enum scsi_device_event evt_type, gfp_t gfpflags)
{
	struct scsi_event *evt = sdev_evt_alloc(evt_type, gfpflags);
	if (!evt) {
		sdev_printk(KERN_ERR, sdev, "event %d eaten due to OOM\n",
			    evt_type);
		return;
	}

	sdev_evt_send(sdev, evt);
}
EXPORT_SYMBOL_GPL(sdev_evt_send_simple);

Linus Torvalds's avatar
Linus Torvalds committed
2577
/**
2578
 *	scsi_device_quiesce - Block all commands except power management.
Linus Torvalds's avatar
Linus Torvalds committed
2579 2580 2581 2582
 *	@sdev:	scsi device to quiesce.
 *
 *	This works by trying to transition to the SDEV_QUIESCE state
 *	(which must be a legal transition).  When the device is in this
2583 2584
 *	state, only power management requests will be accepted, all others will
 *	be deferred.
Linus Torvalds's avatar
Linus Torvalds committed
2585 2586 2587 2588
 *
 *	Must be called with user context, may sleep.
 *
 *	Returns zero if unsuccessful or an error if not.
2589
 */
Linus Torvalds's avatar
Linus Torvalds committed
2590 2591 2592
int
scsi_device_quiesce(struct scsi_device *sdev)
{
2593
	struct request_queue *q = sdev->request_queue;
2594 2595
	int err;

2596 2597 2598 2599 2600 2601 2602
	/*
	 * It is allowed to call scsi_device_quiesce() multiple times from
	 * the same context but concurrent scsi_device_quiesce() calls are
	 * not allowed.
	 */
	WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);

2603 2604 2605 2606
	if (sdev->quiesced_by == current)
		return 0;

	blk_set_pm_only(q);
2607 2608 2609

	blk_mq_freeze_queue(q);
	/*
2610
	 * Ensure that the effect of blk_set_pm_only() will be visible
2611 2612 2613 2614 2615 2616 2617
	 * for percpu_ref_tryget() callers that occur after the queue
	 * unfreeze even if the queue was already frozen before this function
	 * was called. See also https://lwn.net/Articles/573497/.
	 */
	synchronize_rcu();
	blk_mq_unfreeze_queue(q);

2618 2619
	mutex_lock(&sdev->state_mutex);
	err = scsi_device_set_state(sdev, SDEV_QUIESCE);
2620 2621 2622
	if (err == 0)
		sdev->quiesced_by = current;
	else
2623
		blk_clear_pm_only(q);
2624 2625
	mutex_unlock(&sdev->state_mutex);

2626
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637
}
EXPORT_SYMBOL(scsi_device_quiesce);

/**
 *	scsi_device_resume - Restart user issued commands to a quiesced device.
 *	@sdev:	scsi device to resume.
 *
 *	Moves the device from quiesced back to running and restarts the
 *	queues.
 *
 *	Must be called with user context, may sleep.
2638
 */
2639
void scsi_device_resume(struct scsi_device *sdev)
Linus Torvalds's avatar
Linus Torvalds committed
2640
{
2641 2642 2643 2644
	/* check if the device state was mutated prior to resume, and if
	 * so assume the state is being managed elsewhere (for example
	 * device deleted during suspend)
	 */
2645
	mutex_lock(&sdev->state_mutex);
2646 2647
	if (sdev->sdev_state == SDEV_QUIESCE)
		scsi_device_set_state(sdev, SDEV_RUNNING);
2648 2649 2650 2651
	if (sdev->quiesced_by) {
		sdev->quiesced_by = NULL;
		blk_clear_pm_only(sdev->request_queue);
	}
2652
	mutex_unlock(&sdev->state_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682
}
EXPORT_SYMBOL(scsi_device_resume);

static void
device_quiesce_fn(struct scsi_device *sdev, void *data)
{
	scsi_device_quiesce(sdev);
}

void
scsi_target_quiesce(struct scsi_target *starget)
{
	starget_for_each_device(starget, NULL, device_quiesce_fn);
}
EXPORT_SYMBOL(scsi_target_quiesce);

static void
device_resume_fn(struct scsi_device *sdev, void *data)
{
	scsi_device_resume(sdev);
}

void
scsi_target_resume(struct scsi_target *starget)
{
	starget_for_each_device(starget, NULL, device_resume_fn);
}
EXPORT_SYMBOL(scsi_target_resume);

/**
2683 2684
 * scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state
 * @sdev: device to block
Linus Torvalds's avatar
Linus Torvalds committed
2685
 *
2686
 * Pause SCSI command processing on the specified device. Does not sleep.
Linus Torvalds's avatar
Linus Torvalds committed
2687
 *
2688
 * Returns zero if successful or a negative error code upon failure.
2689
 *
2690 2691 2692 2693 2694
 * Notes:
 * This routine transitions the device to the SDEV_BLOCK state (which must be
 * a legal transition). When the device is in this state, command processing
 * is paused until the device leaves the SDEV_BLOCK state. See also
 * scsi_internal_device_unblock_nowait().
2695
 */
2696
int scsi_internal_device_block_nowait(struct scsi_device *sdev)
Linus Torvalds's avatar
Linus Torvalds committed
2697
{
2698
	struct request_queue *q = sdev->request_queue;
Linus Torvalds's avatar
Linus Torvalds committed
2699 2700 2701
	int err = 0;

	err = scsi_device_set_state(sdev, SDEV_BLOCK);
2702 2703 2704 2705 2706 2707
	if (err) {
		err = scsi_device_set_state(sdev, SDEV_CREATED_BLOCK);

		if (err)
			return err;
	}
Linus Torvalds's avatar
Linus Torvalds committed
2708

2709
	/*
Linus Torvalds's avatar
Linus Torvalds committed
2710 2711
	 * The device has transitioned to SDEV_BLOCK.  Stop the
	 * block layer from calling the midlayer with this device's
2712
	 * request queue.
Linus Torvalds's avatar
Linus Torvalds committed
2713
	 */
2714
	blk_mq_quiesce_queue_nowait(q);
Linus Torvalds's avatar
Linus Torvalds committed
2715 2716
	return 0;
}
2717 2718
EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait);

Linus Torvalds's avatar
Linus Torvalds committed
2719
/**
2720 2721 2722 2723 2724
 * scsi_internal_device_block - try to transition to the SDEV_BLOCK state
 * @sdev: device to block
 *
 * Pause SCSI command processing on the specified device and wait until all
 * ongoing scsi_request_fn() / scsi_queue_rq() calls have finished. May sleep.
Linus Torvalds's avatar
Linus Torvalds committed
2725
 *
2726
 * Returns zero if successful or a negative error code upon failure.
Linus Torvalds's avatar
Linus Torvalds committed
2727
 *
2728 2729 2730 2731 2732
 * Note:
 * This routine transitions the device to the SDEV_BLOCK state (which must be
 * a legal transition). When the device is in this state, command processing
 * is paused until the device leaves the SDEV_BLOCK state. See also
 * scsi_internal_device_unblock().
2733
 */
2734
static int scsi_internal_device_block(struct scsi_device *sdev)
Linus Torvalds's avatar
Linus Torvalds committed
2735
{
2736 2737 2738
	struct request_queue *q = sdev->request_queue;
	int err;

2739
	mutex_lock(&sdev->state_mutex);
2740
	err = scsi_internal_device_block_nowait(sdev);
2741 2742
	if (err == 0)
		blk_mq_quiesce_queue(q);
2743 2744
	mutex_unlock(&sdev->state_mutex);

2745 2746
	return err;
}
2747

2748 2749 2750
void scsi_start_queue(struct scsi_device *sdev)
{
	struct request_queue *q = sdev->request_queue;
2751

2752
	blk_mq_unquiesce_queue(q);
2753 2754
}

Linus Torvalds's avatar
Linus Torvalds committed
2755
/**
2756
 * scsi_internal_device_unblock_nowait - resume a device after a block request
Linus Torvalds's avatar
Linus Torvalds committed
2757
 * @sdev:	device to resume
2758
 * @new_state:	state to set the device to after unblocking
Linus Torvalds's avatar
Linus Torvalds committed
2759
 *
2760 2761
 * Restart the device queue for a previously suspended SCSI device. Does not
 * sleep.
Linus Torvalds's avatar
Linus Torvalds committed
2762
 *
2763
 * Returns zero if successful or a negative error code upon failure.
Linus Torvalds's avatar
Linus Torvalds committed
2764
 *
2765 2766 2767 2768
 * Notes:
 * This routine transitions the device to the SDEV_RUNNING state or to one of
 * the offline states (which must be a legal transition) allowing the midlayer
 * to goose the queue for this device.
2769
 */
2770 2771
int scsi_internal_device_unblock_nowait(struct scsi_device *sdev,
					enum scsi_device_state new_state)
Linus Torvalds's avatar
Linus Torvalds committed
2772
{
2773 2774 2775 2776 2777 2778 2779 2780
	switch (new_state) {
	case SDEV_RUNNING:
	case SDEV_TRANSPORT_OFFLINE:
		break;
	default:
		return -EINVAL;
	}

2781 2782 2783
	/*
	 * Try to transition the scsi device to SDEV_RUNNING or one of the
	 * offlined states and goose the device queue if successful.
Linus Torvalds's avatar
Linus Torvalds committed
2784
	 */
2785 2786 2787
	switch (sdev->sdev_state) {
	case SDEV_BLOCK:
	case SDEV_TRANSPORT_OFFLINE:
2788
		sdev->sdev_state = new_state;
2789 2790
		break;
	case SDEV_CREATED_BLOCK:
2791 2792 2793 2794 2795
		if (new_state == SDEV_TRANSPORT_OFFLINE ||
		    new_state == SDEV_OFFLINE)
			sdev->sdev_state = new_state;
		else
			sdev->sdev_state = SDEV_CREATED;
2796 2797 2798 2799 2800
		break;
	case SDEV_CANCEL:
	case SDEV_OFFLINE:
		break;
	default:
2801
		return -EINVAL;
2802
	}
2803
	scsi_start_queue(sdev);
Linus Torvalds's avatar
Linus Torvalds committed
2804 2805 2806

	return 0;
}
2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825
EXPORT_SYMBOL_GPL(scsi_internal_device_unblock_nowait);

/**
 * scsi_internal_device_unblock - resume a device after a block request
 * @sdev:	device to resume
 * @new_state:	state to set the device to after unblocking
 *
 * Restart the device queue for a previously suspended SCSI device. May sleep.
 *
 * Returns zero if successful or a negative error code upon failure.
 *
 * Notes:
 * This routine transitions the device to the SDEV_RUNNING state or to one of
 * the offline states (which must be a legal transition) allowing the midlayer
 * to goose the queue for this device.
 */
static int scsi_internal_device_unblock(struct scsi_device *sdev,
					enum scsi_device_state new_state)
{
2826 2827 2828 2829 2830 2831 2832
	int ret;

	mutex_lock(&sdev->state_mutex);
	ret = scsi_internal_device_unblock_nowait(sdev, new_state);
	mutex_unlock(&sdev->state_mutex);

	return ret;
2833
}
Linus Torvalds's avatar
Linus Torvalds committed
2834 2835 2836 2837

static void
device_block(struct scsi_device *sdev, void *data)
{
2838 2839 2840 2841 2842 2843
	int ret;

	ret = scsi_internal_device_block(sdev);

	WARN_ONCE(ret, "scsi_internal_device_block(%s) failed: ret = %d\n",
		  dev_name(&sdev->sdev_gendev), ret);
Linus Torvalds's avatar
Linus Torvalds committed
2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868
}

static int
target_block(struct device *dev, void *data)
{
	if (scsi_is_target_device(dev))
		starget_for_each_device(to_scsi_target(dev), NULL,
					device_block);
	return 0;
}

void
scsi_target_block(struct device *dev)
{
	if (scsi_is_target_device(dev))
		starget_for_each_device(to_scsi_target(dev), NULL,
					device_block);
	else
		device_for_each_child(dev, NULL, target_block);
}
EXPORT_SYMBOL_GPL(scsi_target_block);

static void
device_unblock(struct scsi_device *sdev, void *data)
{
2869
	scsi_internal_device_unblock(sdev, *(enum scsi_device_state *)data);
Linus Torvalds's avatar
Linus Torvalds committed
2870 2871 2872 2873 2874 2875
}

static int
target_unblock(struct device *dev, void *data)
{
	if (scsi_is_target_device(dev))
2876
		starget_for_each_device(to_scsi_target(dev), data,
Linus Torvalds's avatar
Linus Torvalds committed
2877 2878 2879 2880 2881
					device_unblock);
	return 0;
}

void
2882
scsi_target_unblock(struct device *dev, enum scsi_device_state new_state)
Linus Torvalds's avatar
Linus Torvalds committed
2883 2884
{
	if (scsi_is_target_device(dev))
2885
		starget_for_each_device(to_scsi_target(dev), &new_state,
Linus Torvalds's avatar
Linus Torvalds committed
2886 2887
					device_unblock);
	else
2888
		device_for_each_child(dev, &new_state, target_unblock);
Linus Torvalds's avatar
Linus Torvalds committed
2889 2890
}
EXPORT_SYMBOL_GPL(scsi_target_unblock);
2891

2892 2893 2894 2895 2896 2897
int
scsi_host_block(struct Scsi_Host *shost)
{
	struct scsi_device *sdev;
	int ret = 0;

2898 2899 2900 2901
	/*
	 * Call scsi_internal_device_block_nowait so we can avoid
	 * calling synchronize_rcu() for each LUN.
	 */
2902
	shost_for_each_device(sdev, shost) {
2903 2904 2905
		mutex_lock(&sdev->state_mutex);
		ret = scsi_internal_device_block_nowait(sdev);
		mutex_unlock(&sdev->state_mutex);
2906 2907
		if (ret) {
			scsi_device_put(sdev);
2908
			break;
2909
		}
2910
	}
2911 2912 2913 2914 2915 2916 2917 2918 2919 2920

	/*
	 * SCSI never enables blk-mq's BLK_MQ_F_BLOCKING flag so
	 * calling synchronize_rcu() once is enough.
	 */
	WARN_ON_ONCE(shost->tag_set.flags & BLK_MQ_F_BLOCKING);

	if (!ret)
		synchronize_rcu();

2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932
	return ret;
}
EXPORT_SYMBOL_GPL(scsi_host_block);

int
scsi_host_unblock(struct Scsi_Host *shost, int new_state)
{
	struct scsi_device *sdev;
	int ret = 0;

	shost_for_each_device(sdev, shost) {
		ret = scsi_internal_device_unblock(sdev, new_state);
2933 2934
		if (ret) {
			scsi_device_put(sdev);
2935
			break;
2936
		}
2937 2938 2939 2940 2941
	}
	return ret;
}
EXPORT_SYMBOL_GPL(scsi_host_unblock);

2942 2943
/**
 * scsi_kmap_atomic_sg - find and atomically map an sg-elemnt
2944
 * @sgl:	scatter-gather list
2945 2946 2947 2948 2949 2950
 * @sg_count:	number of segments in sg
 * @offset:	offset in bytes into sg, on return offset into the mapped area
 * @len:	bytes to map, on return number of bytes mapped
 *
 * Returns virtual address of the start of the mapped page
 */
2951
void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
2952 2953 2954 2955
			  size_t *offset, size_t *len)
{
	int i;
	size_t sg_len = 0, len_complete = 0;
2956
	struct scatterlist *sg;
2957 2958
	struct page *page;

2959 2960
	WARN_ON(!irqs_disabled());

2961
	for_each_sg(sgl, sg, sg_count, i) {
2962
		len_complete = sg_len; /* Complete sg-entries */
2963
		sg_len += sg->length;
2964 2965 2966 2967 2968
		if (sg_len > *offset)
			break;
	}

	if (unlikely(i == sg_count)) {
2969 2970
		printk(KERN_ERR "%s: Bytes in sg: %zu, requested offset %zu, "
			"elements %d\n",
2971
		       __func__, sg_len, *offset, sg_count);
2972 2973 2974 2975 2976
		WARN_ON(1);
		return NULL;
	}

	/* Offset starting from the beginning of first page in this sg-entry */
2977
	*offset = *offset - len_complete + sg->offset;
2978 2979

	/* Assumption: contiguous pages can be accessed as "page + i" */
2980
	page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT));
2981 2982 2983 2984 2985 2986 2987
	*offset &= ~PAGE_MASK;

	/* Bytes in this sg-entry from *offset to the end of the page */
	sg_len = PAGE_SIZE - *offset;
	if (*len > sg_len)
		*len = sg_len;

2988
	return kmap_atomic(page);
2989 2990 2991 2992
}
EXPORT_SYMBOL(scsi_kmap_atomic_sg);

/**
2993
 * scsi_kunmap_atomic_sg - atomically unmap a virtual address, previously mapped with scsi_kmap_atomic_sg
2994 2995 2996 2997
 * @virt:	virtual address to be unmapped
 */
void scsi_kunmap_atomic_sg(void *virt)
{
2998
	kunmap_atomic(virt);
2999 3000
}
EXPORT_SYMBOL(scsi_kunmap_atomic_sg);
3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014

void sdev_disable_disk_events(struct scsi_device *sdev)
{
	atomic_inc(&sdev->disk_events_disable_depth);
}
EXPORT_SYMBOL(sdev_disable_disk_events);

void sdev_enable_disk_events(struct scsi_device *sdev)
{
	if (WARN_ON_ONCE(atomic_read(&sdev->disk_events_disable_depth) <= 0))
		return;
	atomic_dec(&sdev->disk_events_disable_depth);
}
EXPORT_SYMBOL(sdev_enable_disk_events);
3015

3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087
static unsigned char designator_prio(const unsigned char *d)
{
	if (d[1] & 0x30)
		/* not associated with LUN */
		return 0;

	if (d[3] == 0)
		/* invalid length */
		return 0;

	/*
	 * Order of preference for lun descriptor:
	 * - SCSI name string
	 * - NAA IEEE Registered Extended
	 * - EUI-64 based 16-byte
	 * - EUI-64 based 12-byte
	 * - NAA IEEE Registered
	 * - NAA IEEE Extended
	 * - EUI-64 based 8-byte
	 * - SCSI name string (truncated)
	 * - T10 Vendor ID
	 * as longer descriptors reduce the likelyhood
	 * of identification clashes.
	 */

	switch (d[1] & 0xf) {
	case 8:
		/* SCSI name string, variable-length UTF-8 */
		return 9;
	case 3:
		switch (d[4] >> 4) {
		case 6:
			/* NAA registered extended */
			return 8;
		case 5:
			/* NAA registered */
			return 5;
		case 4:
			/* NAA extended */
			return 4;
		case 3:
			/* NAA locally assigned */
			return 1;
		default:
			break;
		}
		break;
	case 2:
		switch (d[3]) {
		case 16:
			/* EUI64-based, 16 byte */
			return 7;
		case 12:
			/* EUI64-based, 12 byte */
			return 6;
		case 8:
			/* EUI64-based, 8 byte */
			return 3;
		default:
			break;
		}
		break;
	case 1:
		/* T10 vendor ID */
		return 1;
	default:
		break;
	}

	return 0;
}

3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103
/**
 * scsi_vpd_lun_id - return a unique device identification
 * @sdev: SCSI device
 * @id:   buffer for the identification
 * @id_len:  length of the buffer
 *
 * Copies a unique device identification into @id based
 * on the information in the VPD page 0x83 of the device.
 * The string will be formatted as a SCSI name string.
 *
 * Returns the length of the identification or error on failure.
 * If the identifier is longer than the supplied buffer the actual
 * identifier length is returned and the buffer is not zero-padded.
 */
int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len)
{
3104
	u8 cur_id_prio = 0;
3105
	u8 cur_id_size = 0;
3106 3107
	const unsigned char *d, *cur_id_str;
	const struct scsi_vpd *vpd_pg83;
3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123
	int id_size = -EINVAL;

	rcu_read_lock();
	vpd_pg83 = rcu_dereference(sdev->vpd_pg83);
	if (!vpd_pg83) {
		rcu_read_unlock();
		return -ENXIO;
	}

	/* The id string must be at least 20 bytes + terminating NULL byte */
	if (id_len < 21) {
		rcu_read_unlock();
		return -EINVAL;
	}

	memset(id, 0, id_len);
3124 3125 3126
	for (d = vpd_pg83->data + 4;
	     d < vpd_pg83->data + vpd_pg83->len;
	     d += d[3] + 4) {
3127 3128 3129
		u8 prio = designator_prio(d);

		if (prio == 0 || cur_id_prio > prio)
3130
			continue;
3131 3132

		switch (d[1] & 0xf) {
3133 3134 3135 3136
		case 0x1:
			/* T10 Vendor ID */
			if (cur_id_size > d[3])
				break;
3137
			cur_id_prio = prio;
3138 3139 3140 3141 3142 3143 3144
			cur_id_size = d[3];
			if (cur_id_size + 4 > id_len)
				cur_id_size = id_len - 4;
			cur_id_str = d + 4;
			id_size = snprintf(id, id_len, "t10.%*pE",
					   cur_id_size, cur_id_str);
			break;
3145 3146
		case 0x2:
			/* EUI-64 */
3147
			cur_id_prio = prio;
3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171
			cur_id_size = d[3];
			cur_id_str = d + 4;
			switch (cur_id_size) {
			case 8:
				id_size = snprintf(id, id_len,
						   "eui.%8phN",
						   cur_id_str);
				break;
			case 12:
				id_size = snprintf(id, id_len,
						   "eui.%12phN",
						   cur_id_str);
				break;
			case 16:
				id_size = snprintf(id, id_len,
						   "eui.%16phN",
						   cur_id_str);
				break;
			default:
				break;
			}
			break;
		case 0x3:
			/* NAA */
3172
			cur_id_prio = prio;
3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191
			cur_id_size = d[3];
			cur_id_str = d + 4;
			switch (cur_id_size) {
			case 8:
				id_size = snprintf(id, id_len,
						   "naa.%8phN",
						   cur_id_str);
				break;
			case 16:
				id_size = snprintf(id, id_len,
						   "naa.%16phN",
						   cur_id_str);
				break;
			default:
				break;
			}
			break;
		case 0x8:
			/* SCSI name string */
3192
			if (cur_id_size > d[3])
3193 3194
				break;
			/* Prefer others for truncated descriptor */
3195 3196 3197 3198 3199 3200
			if (d[3] > id_len) {
				prio = 2;
				if (cur_id_prio > prio)
					break;
			}
			cur_id_prio = prio;
3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215
			cur_id_size = id_size = d[3];
			cur_id_str = d + 4;
			if (cur_id_size >= id_len)
				cur_id_size = id_len - 1;
			memcpy(id, cur_id_str, cur_id_size);
			break;
		default:
			break;
		}
	}
	rcu_read_unlock();

	return id_size;
}
EXPORT_SYMBOL(scsi_vpd_lun_id);
3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227

/*
 * scsi_vpd_tpg_id - return a target port group identifier
 * @sdev: SCSI device
 *
 * Returns the Target Port Group identifier from the information
 * froom VPD page 0x83 of the device.
 *
 * Returns the identifier or error on failure.
 */
int scsi_vpd_tpg_id(struct scsi_device *sdev, int *rel_id)
{
3228 3229
	const unsigned char *d;
	const struct scsi_vpd *vpd_pg83;
3230 3231 3232 3233 3234 3235 3236 3237 3238
	int group_id = -EAGAIN, rel_port = -1;

	rcu_read_lock();
	vpd_pg83 = rcu_dereference(sdev->vpd_pg83);
	if (!vpd_pg83) {
		rcu_read_unlock();
		return -ENXIO;
	}

3239 3240
	d = vpd_pg83->data + 4;
	while (d < vpd_pg83->data + vpd_pg83->len) {
3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262
		switch (d[1] & 0xf) {
		case 0x4:
			/* Relative target port */
			rel_port = get_unaligned_be16(&d[6]);
			break;
		case 0x5:
			/* Target port group */
			group_id = get_unaligned_be16(&d[6]);
			break;
		default:
			break;
		}
		d += d[3] + 4;
	}
	rcu_read_unlock();

	if (group_id >= 0 && rel_id && rel_port != -1)
		*rel_id = rel_port;

	return group_id;
}
EXPORT_SYMBOL(scsi_vpd_tpg_id);