Commit e26feff6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block: (132 commits)
  doc/cdrom: Trvial documentation error, file not present
  block_dev: fix kernel-doc in new functions
  block: add some comments around the bio read-write flags
  block: mark bio_split_pool static
  block: Find bio sector offset given idx and offset
  block: gendisk integrity wrapper
  block: Switch blk_integrity_compare from bdev to gendisk
  block: Fix double put in blk_integrity_unregister
  block: Introduce integrity data ownership flag
  block: revert part of d7533ad0e132f92e75c1b2eb7c26387b25a583c1
  bio.h: Remove unused conditional code
  block: remove end_{queued|dequeued}_request()
  block: change elevator to use __blk_end_request()
  gdrom: change to use __blk_end_request()
  memstick: change to use __blk_end_request()
  virtio_blk: change to use __blk_end_request()
  blktrace: use BLKTRACE_BDEV_SIZE as the name size for setup structure
  block: add lld busy state exporting interface
  block: Fix blk_start_queueing() to not kick a stopped queue
  include blktrace_api.h in headers_install
  ...
parents d403a648 b911e473
......@@ -337,7 +337,7 @@ With scatterlists, you use the resulting mapping like this:
int i, count = dma_map_sg(dev, sglist, nents, direction);
struct scatterlist *sg;
for (i = 0, sg = sglist; i < count; i++, sg++) {
for_each_sg(sglist, sg, count, i) {
hw_address[i] = sg_dma_address(sg);
hw_len[i] = sg_dma_len(sg);
}
......
......@@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c
!Eblock/blk-barrier.c
!Eblock/blk-tag.c
!Iblock/blk-tag.c
!Eblock/blk-integrity.c
!Iblock/blktrace.c
!Iblock/genhd.c
!Eblock/genhd.c
</chapter>
<chapter id="chrdev">
......
......@@ -30,12 +30,18 @@ write_expire (in ms)
Similar to read_expire mentioned above, but for writes.
fifo_batch
fifo_batch (number of requests)
----------
When a read request expires its deadline, we must move some requests from
the sorted io scheduler list to the block device dispatch queue. fifo_batch
controls how many requests we move.
Requests are grouped into ``batches'' of a particular data direction (read or
write) which are serviced in increasing sector order. To limit extra seeking,
deadline expiries are only checked between batches. fifo_batch controls the
maximum number of requests per batch.
This parameter tunes the balance between per-request latency and aggregate
throughput. When low latency is the primary concern, smaller is better (where
a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
generally improves throughput, at the cost of latency variation.
writes_starved (number of dispatches)
......
......@@ -145,8 +145,7 @@ useful for reading photocds.
To play an audio CD, you should first unmount and remove any data
CDROM. Any of the CDROM player programs should then work (workman,
workbone, cdplayer, etc.). Lacking anything else, you could use the
cdtester program in Documentation/cdrom/sbpcd.
workbone, cdplayer, etc.).
On a few drives, you can read digital audio directly using a program
such as cdda2wav. The only types of drive which I've heard support
......
......@@ -4,8 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
cmd-filter.o
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
......
......@@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data *ad)
del_timer(&ad->antic_timer);
ad->antic_status = ANTIC_FINISHED;
/* see as_work_handler */
kblockd_schedule_work(&ad->antic_work);
kblockd_schedule_work(ad->q, &ad->antic_work);
}
}
......@@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned long data)
aic = ad->io_context->aic;
ad->antic_status = ANTIC_FINISHED;
kblockd_schedule_work(&ad->antic_work);
kblockd_schedule_work(q, &ad->antic_work);
if (aic->ttime_samples == 0) {
/* process anticipated on has exited or timed out*/
......@@ -745,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
*/
static int as_can_anticipate(struct as_data *ad, struct request *rq)
{
#if 0 /* disable for now, we need to check tag level as well */
/*
* SSD device without seek penalty, disable idling
*/
if (blk_queue_nonrot(ad->q)) axman
return 0;
#endif
if (!ad->io_context)
/*
* Last request submitted was a write
......@@ -844,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
if (ad->changed_batch && ad->nr_dispatched == 1) {
ad->current_batch_expires = jiffies +
ad->batch_expire[ad->batch_data_dir];
kblockd_schedule_work(&ad->antic_work);
kblockd_schedule_work(q, &ad->antic_work);
ad->changed_batch = 0;
if (ad->batch_data_dir == REQ_SYNC)
......
......@@ -293,7 +293,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
bio->bi_end_io = bio_end_empty_barrier;
bio->bi_private = &wait;
bio->bi_bdev = bdev;
submit_bio(1 << BIO_RW_BARRIER, bio);
submit_bio(WRITE_BARRIER, bio);
wait_for_completion(&wait);
......@@ -315,3 +315,73 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
return ret;
}
EXPORT_SYMBOL(blkdev_issue_flush);
static void blkdev_discard_end_io(struct bio *bio, int err)
{
if (err) {
if (err == -EOPNOTSUPP)
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
clear_bit(BIO_UPTODATE, &bio->bi_flags);
}
bio_put(bio);
}
/**
* blkdev_issue_discard - queue a discard
* @bdev: blockdev to issue discard for
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
*
* Description:
* Issue a discard request for the sectors in question. Does not wait.
*/
int blkdev_issue_discard(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
{
struct request_queue *q;
struct bio *bio;
int ret = 0;
if (bdev->bd_disk == NULL)
return -ENXIO;
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
if (!q->prepare_discard_fn)
return -EOPNOTSUPP;
while (nr_sects && !ret) {
bio = bio_alloc(gfp_mask, 0);
if (!bio)
return -ENOMEM;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
bio->bi_sector = sector;
if (nr_sects > q->max_hw_sectors) {
bio->bi_size = q->max_hw_sectors << 9;
nr_sects -= q->max_hw_sectors;
sector += q->max_hw_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
}
bio_get(bio);
submit_bio(DISCARD_BARRIER, bio);
/* Check if it failed immediately */
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
else if (!bio_flagged(bio, BIO_UPTODATE))
ret = -EIO;
bio_put(bio);
}
return ret;
}
EXPORT_SYMBOL(blkdev_issue_discard);
This diff is collapsed.
......@@ -16,7 +16,7 @@
/**
* blk_end_sync_rq - executes a completion event on a request
* @rq: request to complete
* @error: end io status of the request
* @error: end I/O status of the request
*/
static void blk_end_sync_rq(struct request *rq, int error)
{
......@@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct request *rq, int error)
* @done: I/O completion handler
*
* Description:
* Insert a fully prepared request at the back of the io scheduler queue
* Insert a fully prepared request at the back of the I/O scheduler queue
* for execution. Don't wait for completion.
*/
void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
......@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
* @at_head: insert request at head or tail of queue
*
* Description:
* Insert a fully prepared request at the back of the io scheduler queue
* Insert a fully prepared request at the back of the I/O scheduler queue
* for execution and wait for completion.
*/
int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
......
......@@ -108,51 +108,51 @@ int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
/**
* blk_integrity_compare - Compare integrity profile of two block devices
* @b1: Device to compare
* @b2: Device to compare
* blk_integrity_compare - Compare integrity profile of two disks
* @gd1: Disk to compare
* @gd2: Disk to compare
*
* Description: Meta-devices like DM and MD need to verify that all
* sub-devices use the same integrity format before advertising to
* upper layers that they can send/receive integrity metadata. This
* function can be used to check whether two block devices have
* function can be used to check whether two gendisk devices have
* compatible integrity formats.
*/
int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
{
struct blk_integrity *b1 = bd1->bd_disk->integrity;
struct blk_integrity *b2 = bd2->bd_disk->integrity;
struct blk_integrity *b1 = gd1->integrity;
struct blk_integrity *b2 = gd2->integrity;
BUG_ON(bd1->bd_disk == NULL);
BUG_ON(bd2->bd_disk == NULL);
if (!b1 && !b2)
return 0;
if (!b1 || !b2)
return 0;
return -1;
if (b1->sector_size != b2->sector_size) {
printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
gd1->disk_name, gd2->disk_name,
b1->sector_size, b2->sector_size);
return -1;
}
if (b1->tuple_size != b2->tuple_size) {
printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
gd1->disk_name, gd2->disk_name,
b1->tuple_size, b2->tuple_size);
return -1;
}
if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
gd1->disk_name, gd2->disk_name,
b1->tag_size, b2->tag_size);
return -1;
}
if (strcmp(b1->name, b2->name)) {
printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
gd1->disk_name, gd2->disk_name,
b1->name, b2->name);
return -1;
}
......@@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
return -1;
if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
&disk->dev.kobj, "%s", "integrity")) {
&disk_to_dev(disk)->kobj,
"%s", "integrity")) {
kmem_cache_free(integrity_cachep, bi);
return -1;
}
......@@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk)
kobject_uevent(&bi->kobj, KOBJ_REMOVE);
kobject_del(&bi->kobj);
kobject_put(&disk->dev.kobj);
kmem_cache_free(integrity_cachep, bi);
disk->integrity = NULL;
}
EXPORT_SYMBOL(blk_integrity_unregister);
......@@ -41,10 +41,10 @@ static int __blk_rq_unmap_user(struct bio *bio)
}
static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
void __user *ubuf, unsigned int len)
struct rq_map_data *map_data, void __user *ubuf,
unsigned int len, int null_mapped, gfp_t gfp_mask)
{
unsigned long uaddr;
unsigned int alignment;
struct bio *bio, *orig_bio;
int reading, ret;
......@@ -55,15 +55,17 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
* direct dma. else, set up kernel bounce buffers
*/
uaddr = (unsigned long) ubuf;
alignment = queue_dma_alignment(q) | q->dma_pad_mask;
if (!(uaddr & alignment) && !(len & alignment))
bio = bio_map_user(q, NULL, uaddr, len, reading);
if (blk_rq_aligned(q, ubuf, len) && !map_data)
bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
else
bio = bio_copy_user(q, uaddr, len, reading);
bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
if (IS_ERR(bio))
return PTR_ERR(bio);
if (null_mapped)
bio->bi_flags |= (1 << BIO_NULL_MAPPED);
orig_bio = bio;
blk_queue_bounce(q, &bio);
......@@ -85,17 +87,19 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
}
/**
* blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
* blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted
* @rq: request structure to fill
* @map_data: pointer to the rq_map_data holding pages (if necessary)
* @ubuf: the user buffer
* @len: length of user data
* @gfp_mask: memory allocation flags
*
* Description:
* Data will be mapped directly for zero copy io, if possible. Otherwise
* Data will be mapped directly for zero copy I/O, if possible. Otherwise
* a kernel bounce buffer is used.
*
* A matching blk_rq_unmap_user() must be issued at the end of io, while
* A matching blk_rq_unmap_user() must be issued at the end of I/O, while
* still in process context.
*
* Note: The mapped bio may need to be bounced through blk_queue_bounce()
......@@ -105,16 +109,22 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
* unmapping.
*/
int blk_rq_map_user(struct request_queue *q, struct request *rq,
void __user *ubuf, unsigned long len)
struct rq_map_data *map_data, void __user *ubuf,
unsigned long len, gfp_t gfp_mask)
{
unsigned long bytes_read = 0;
struct bio *bio = NULL;
int ret;
int ret, null_mapped = 0;
if (len > (q->max_hw_sectors << 9))
return -EINVAL;
if (!len || !ubuf)
if (!len)
return -EINVAL;
if (!ubuf) {
if (!map_data || rq_data_dir(rq) != READ)
return -EINVAL;
null_mapped = 1;
}
while (bytes_read != len) {
unsigned long map_len, end, start;
......@@ -132,7 +142,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
if (end - start > BIO_MAX_PAGES)
map_len -= PAGE_SIZE;
ret = __blk_rq_map_user(q, rq, ubuf, map_len);
ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
null_mapped, gfp_mask);
if (ret < 0)
goto unmap_rq;
if (!bio)
......@@ -154,18 +165,20 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
EXPORT_SYMBOL(blk_rq_map_user);
/**
* blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
* blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted
* @rq: request to map data to
* @map_data: pointer to the rq_map_data holding pages (if necessary)
* @iov: pointer to the iovec
* @iov_count: number of elements in the iovec
* @len: I/O byte count
* @gfp_mask: memory allocation flags
*
* Description:
* Data will be mapped directly for zero copy io, if possible. Otherwise
* Data will be mapped directly for zero copy I/O, if possible. Otherwise
* a kernel bounce buffer is used.
*
* A matching blk_rq_unmap_user() must be issued at the end of io, while
* A matching blk_rq_unmap_user() must be issued at the end of I/O, while
* still in process context.
*
* Note: The mapped bio may need to be bounced through blk_queue_bounce()
......@@ -175,7 +188,8 @@ EXPORT_SYMBOL(blk_rq_map_user);
* unmapping.
*/
int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
struct sg_iovec *iov, int iov_count, unsigned int len)
struct rq_map_data *map_data, struct sg_iovec *iov,
int iov_count, unsigned int len, gfp_t gfp_mask)
{
struct bio *bio;
int i, read = rq_data_dir(rq) == READ;
......@@ -193,10 +207,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
}
}
if (unaligned || (q->dma_pad_mask & len))
bio = bio_copy_user_iov(q, iov, iov_count, read);
if (unaligned || (q->dma_pad_mask & len) || map_data)
bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
gfp_mask);
else
bio = bio_map_user_iov(q, NULL, iov, iov_count, read);
bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
if (IS_ERR(bio))
return PTR_ERR(bio);
......@@ -216,6 +231,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
rq->buffer = rq->data = NULL;
return 0;
}
EXPORT_SYMBOL(blk_rq_map_user_iov);
/**
* blk_rq_unmap_user - unmap a request with user data
......@@ -224,7 +240,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
* Description:
* Unmap a rq previously mapped by blk_rq_map_user(). The caller must
* supply the original rq->bio from the blk_rq_map_user() return, since
* the io completion may have changed rq->bio.
* the I/O completion may have changed rq->bio.
*/
int blk_rq_unmap_user(struct bio *bio)
{
......@@ -250,7 +266,7 @@ int blk_rq_unmap_user(struct bio *bio)
EXPORT_SYMBOL(blk_rq_unmap_user);
/**
* blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
* blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted
* @rq: request to fill
* @kbuf: the kernel buffer
......@@ -264,8 +280,6 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
unsigned int len, gfp_t gfp_mask)
{
unsigned long kaddr;
unsigned int alignment;
int reading = rq_data_dir(rq) == READ;
int do_copy = 0;
struct bio *bio;
......@@ -275,11 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
if (!len || !kbuf)
return -EINVAL;
kaddr = (unsigned long)kbuf;
alignment = queue_dma_alignment(q) | q->dma_pad_mask;
do_copy = ((kaddr & alignment) || (len & alignment) ||
object_is_on_stack(kbuf));
do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf);
if (do_copy)
bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
else
......
......@@ -11,7 +11,7 @@
void blk_recalc_rq_sectors(struct request *rq, int nsect)
{
if (blk_fs_request(rq)) {
if (blk_fs_request(rq) || blk_discard_rq(rq)) {
rq->hard_sector += nsect;
rq->hard_nr_sectors -= nsect;
......@@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect)
void blk_recalc_rq_segments(struct request *rq)
{
int nr_phys_segs;
int nr_hw_segs;
unsigned int phys_size;
unsigned int hw_size;
struct bio_vec *bv, *bvprv = NULL;
int seg_size;
int hw_seg_size;
int cluster;
struct req_iterator iter;
int high, highprv = 1;
......@@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct request *rq)
return;
cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
hw_seg_size = seg_size = 0;
phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
seg_size = 0;
phys_size = nr_phys_segs = 0;
rq_for_each_segment(bv, rq, iter) {
/*
* the trick here is making sure that a high page is never
......@@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct request *rq)
*/
high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
if (high || highprv)
goto new_hw_segment;
goto new_segment;
if (cluster) {
if (seg_size + bv->bv_len > q->max_segment_size)
goto new_segment;
......@@ -74,40 +71,19 @@ void blk_recalc_rq_segments(struct request *rq)
goto new_segment;
if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
goto new_segment;
if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
goto new_hw_segment;
seg_size += bv->bv_len;
hw_seg_size += bv->bv_len;
bvprv = bv;
continue;
}
new_segment:
if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
!BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
hw_seg_size += bv->bv_len;
else {
new_hw_segment:
if (nr_hw_segs == 1 &&
hw_seg_size > rq->bio->bi_hw_front_size)
rq->bio->bi_hw_front_size = hw_seg_size;
hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
nr_hw_segs++;
}
nr_phys_segs++;
bvprv = bv;
seg_size = bv->bv_len;
highprv = high;
}
if (nr_hw_segs == 1 &&
hw_seg_size > rq->bio->bi_hw_front_size)
rq->bio->bi_hw_front_size = hw_seg_size;
if (hw_seg_size > rq->biotail->bi_hw_back_size)
rq->biotail->bi_hw_back_size = hw_seg_size;
rq->nr_phys_segments = nr_phys_segs;
rq->nr_hw_segments = nr_hw_segs;
}
void blk_recount_segments(struct request_queue *q, struct bio *bio)
......@@ -120,7 +96,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio)
blk_recalc_rq_segments(&rq);
bio->bi_next = nxt;
bio->bi_phys_segments = rq.nr_phys_segments;
bio->bi_hw_segments = rq.nr_hw_segments;
bio->bi_flags |= (1 << BIO_SEG_VALID);
}
EXPORT_SYMBOL(blk_recount_segments);
......@@ -131,13 +106,17 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
return 0;
if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
return 0;
if (bio->bi_size + nxt->bi_size > q->max_segment_size)
return 0;
if (!bio_has_data(bio))
return 1;
if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
return 0;
/*
* bio and nxt are contigous in memory, check if the queue allows
* bio and nxt are contiguous in memory; check if the queue allows
* these two to be merged into one
*/
if (BIO_SEG_BOUNDARY(q, bio, nxt))
......@@ -146,22 +125,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
return 0;
}
static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
struct bio *nxt)
{
if (!bio_flagged(bio, BIO_SEG_VALID))
blk_recount_segments(q, bio);
if (!bio_flagged(nxt, BIO_SEG_VALID))
blk_recount_segments(q, nxt);
if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
return 0;
if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
return 0;
return 1;
}
/*
* map a request to scatterlist, return number of sg entries setup. Caller
* must make sure sg can hold rq->nr_phys_segments entries
......@@ -275,10 +238,9 @@ static inline int ll_new_hw_segment(struct request_queue *q,
struct request *req,
struct bio *bio)
{
int nr_hw_segs = bio_hw_segments(q, bio);
int nr_phys_segs = bio_phys_segments(q, bio);
if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
|| req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
......@@ -290,7 +252,6 @@ static inline int ll_new_hw_segment(struct request_queue *q,
* This will form the start of a new hw segment. Bump both
* counters.
*/
req->nr_hw_segments += nr_hw_segs;
req->nr_phys_segments += nr_phys_segs;
return 1;
}
......@@ -299,7 +260,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short max_sectors;
int len;
if (unlikely(blk_pc_request(req)))
max_sectors = q->max_hw_sectors;
......@@ -316,19 +276,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
blk_recount_segments(q, req->biotail);
if (!bio_flagged(bio, BIO_SEG_VALID))
blk_recount_segments(q, bio);
len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
&& !BIOVEC_VIRT_OVERSIZE(len)) {
int mergeable = ll_new_mergeable(q, req, bio);
if (mergeable) {
if (req->nr_hw_segments == 1)
req->bio->bi_hw_front_size = len;
if (bio->bi_hw_segments == 1)
bio->bi_hw_back_size = len;
}
return mergeable;
}
return ll_new_hw_segment(q, req, bio);
}
......@@ -337,7 +284,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short max_sectors;
int len;
if (unlikely(blk_pc_request(req)))
max_sectors = q->max_hw_sectors;
......@@ -351,23 +297,10 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
q->last_merge = NULL;
return 0;
}
len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
if (!bio_flagged(bio, BIO_SEG_VALID))
blk_recount_segments(q, bio);
if (!bio_flagged(req->bio, BIO_SEG_VALID))
blk_recount_segments(q, req->bio);
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
!BIOVEC_VIRT_OVERSIZE(len)) {
int mergeable = ll_new_mergeable(q, req, bio);
if (mergeable) {
if (bio->bi_hw_segments == 1)
bio->bi_hw_front_size = len;
if (req->nr_hw_segments == 1)
req->biotail->bi_hw_back_size = len;
}
return mergeable;
}
return ll_new_hw_segment(q, req, bio);
}
......@@ -376,7 +309,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
struct request *next)
{
int total_phys_segments;
int total_hw_segments;
/*
* First check if the either of the requests are re-queued
......@@ -398,26 +330,11 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
if (total_phys_segments > q->max_phys_segments)
return 0;
total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
int len = req->biotail->bi_hw_back_size +
next->bio->bi_hw_front_size;
/*
* propagate the combined length to the end of the requests
*/
if (req->nr_hw_segments == 1)
req->bio->bi_hw_front_size = len;
if (next->nr_hw_segments == 1)
next->biotail->bi_hw_back_size = len;
total_hw_segments--;
}
if (total_hw_segments > q->max_hw_segments)
if (total_phys_segments > q->max_hw_segments)
return 0;
/* Merge is OK... */
req->nr_phys_segments = total_phys_segments;
req->nr_hw_segments = total_hw_segments;
return 1;
}
......@@ -470,17 +387,21 @@ static int attempt_merge(struct request_queue *q, struct request *req,
elv_merge_requests(q, req, next);
if (req->rq_disk) {
struct hd_struct *part
= get_part(req->rq_disk, req->sector);
disk_round_stats(req->rq_disk);
req->rq_disk->in_flight--;
if (part) {
part_round_stats(part);
part->in_flight--;
}
struct hd_struct *part;
int cpu;
cpu = part_stat_lock();
part = disk_map_sector_rcu(req->rq_disk, req->sector);
part_round_stats(cpu, part);
part_dec_in_flight(part);
part_stat_unlock();
}
req->ioprio = ioprio_best(req->ioprio, next->ioprio);
if (blk_rq_cpu_valid(next))
req->cpu = next->cpu;
__blk_put_request(q, next);
return 1;
......
......@@ -32,6 +32,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
}
EXPORT_SYMBOL(blk_queue_prep_rq);
/**
* blk_queue_set_discard - set a discard_sectors function for queue
* @q: queue
* @dfn: prepare_discard function
*
* It's possible for a queue to register a discard callback which is used
* to transform a discard request into the appropriate type for the
* hardware. If none is registered, then discard requests are failed
* with %EOPNOTSUPP.
*
*/
void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
{
q->prepare_discard_fn = dfn;
}
EXPORT_SYMBOL(blk_queue_set_discard);
/**
* blk_queue_merge_bvec - set a merge_bvec function for queue
* @q: queue
......@@ -60,6 +77,24 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
}
EXPORT_SYMBOL(blk_queue_softirq_done);
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
{
q->rq_timeout = timeout;
}
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
{
q->rq_timed_out_fn = fn;
}
EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
{
q->lld_busy_fn = fn;
}
EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
/**
* blk_queue_make_request - define an alternate make_request function for a device
* @q: the request queue for the device to be affected
......@@ -127,7 +162,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
* Different hardware can have different requirements as to what pages
* it can do I/O directly to. A low level driver can call
* blk_queue_bounce_limit to have lower memory pages allocated as bounce
* buffers for doing I/O to pages residing above @page.
* buffers for doing I/O to pages residing above @dma_addr.
**/
void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
{
......@@ -212,7 +247,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segments);
* Description:
* Enables a low level driver to set an upper limit on the number of
* hw data segments in a request. This would be the largest number of
* address/length pairs the host adapter can actually give as once
* address/length pairs the host adapter can actually give at once
* to the device.
**/
void blk_queue_max_hw_segments(struct request_queue *q,
......@@ -393,7 +428,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary);
* @mask: alignment mask
*
* description:
* set required memory and length aligment for direct dma transactions.
* set required memory and length alignment for direct dma transactions.
* this is used when buiding direct io requests for the queue.
*
**/
......@@ -409,7 +444,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment);
* @mask: alignment mask
*
* description:
* update required memory and length aligment for direct dma transactions.
* update required memory and length alignment for direct dma transactions.
* If the requested alignment is larger than the current alignment, then
* the current queue alignment is updated to the new value, otherwise it
* is left alone. The design of this is to allow multiple objects
......
/*
* Functions related to softirq rq completions
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include "blk.h"
static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
/*
* Softirq action handler - move entries to local list and loop over them
* while passing them to the queue registered handler.
*/
static void blk_done_softirq(struct softirq_action *h)
{
struct list_head *cpu_list, local_list;
local_irq_disable();
cpu_list = &__get_cpu_var(blk_cpu_done);
list_replace_init(cpu_list, &local_list);
local_irq_enable();
while (!list_empty(&local_list)) {
struct request *rq;
rq = list_entry(local_list.next, struct request, csd.list);
list_del_init(&rq->csd.list);
rq->q->softirq_done_fn(rq);
}
}
#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
static void trigger_softirq(void *data)
{
struct request *rq = data;
unsigned long flags;
struct list_head *list;
local_irq_save(flags);
list = &__get_cpu_var(blk_cpu_done);
list_add_tail(&rq->csd.list, list);
if (list->next == &rq->csd.list)
raise_softirq_irqoff(BLOCK_SOFTIRQ);
local_irq_restore(flags);
}
/*
* Setup and invoke a run of 'trigger_softirq' on the given cpu.
*/
static int raise_blk_irq(int cpu, struct request *rq)
{
if (cpu_online(cpu)) {
struct call_single_data *data = &rq->csd;
data->func = trigger_softirq;
data->info = rq;
data->flags = 0;
__smp_call_function_single(cpu, data);
return 0;
}
return 1;
}
#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
static int raise_blk_irq(int cpu, struct request *rq)
{
return 1;
}
#endif
static int __cpuinit blk_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
int cpu = (unsigned long) hcpu;
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_done, cpu),
&__get_cpu_var(blk_cpu_done));
raise_softirq_irqoff(BLOCK_SOFTIRQ);
local_irq_enable();
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata blk_cpu_notifier = {
.notifier_call = blk_cpu_notify,
};
void __blk_complete_request(struct request *req)
{
struct request_queue *q = req->q;
unsigned long flags;
int ccpu, cpu, group_cpu;
BUG_ON(!q->softirq_done_fn);
local_irq_save(flags);
cpu = smp_processor_id();
group_cpu = blk_cpu_to_group(cpu);
/*
* Select completion CPU
*/
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
ccpu = req->cpu;
else
ccpu = cpu;
if (ccpu == cpu || ccpu == group_cpu) {
struct list_head *list;
do_local:
list = &__get_cpu_var(blk_cpu_done);
list_add_tail(&req->csd.list, list);
/*
* if the list only contains our just added request,
* signal a raise of the softirq. If there are already
* entries there, someone already raised the irq but it
* hasn't run yet.
*/
if (list->next == &req->csd.list)
raise_softirq_irqoff(BLOCK_SOFTIRQ);
} else if (raise_blk_irq(ccpu, req))
goto do_local;
local_irq_restore(flags);
}
/**
* blk_complete_request - end I/O on a request
* @req: the request being processed
*
* Description:
* Ends all I/O on a request. It does not handle partial completions,
* unless the driver actually implements this in its completion callback
* through requeueing. The actual completion happens out-of-order,
* through a softirq handler. The user must have registered a completion
* callback through blk_queue_softirq_done().
**/
void blk_complete_request(struct request *req)
{
if (unlikely(blk_should_fake_timeout(req->q)))
return;
if (!blk_mark_rq_complete(req))
__blk_complete_request(req);
}
EXPORT_SYMBOL(blk_complete_request);
__init int blk_softirq_init(void)
{
int i;
for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
register_hotcpu_notifier(&blk_cpu_notifier);
return 0;
}
subsys_initcall(blk_softirq_init);
......@@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
return ret;
}
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
return queue_var_show(set != 0, page);
}
static ssize_t
queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
{
ssize_t ret = -EINVAL;
#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
unsigned long val;
ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
if (val)
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
else
queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
spin_unlock_irq(q->queue_lock);
#endif
return ret;
}
static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
......@@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = {
.store = queue_nomerges_store,
};
static struct queue_sysfs_entry queue_rq_affinity_entry = {
.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
.show = queue_rq_affinity_show,
.store = queue_rq_affinity_store,
};
static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
......@@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = {
&queue_iosched_entry.attr,
&queue_hw_sector_size_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
NULL,
};
......@@ -310,7 +341,7 @@ int blk_register_queue(struct gendisk *disk)
if (!q->request_fn)
return 0;
ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj),
ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
"%s", "queue");
if (ret < 0)
return ret;
......@@ -339,6 +370,6 @@ void blk_unregister_queue(struct gendisk *disk)
kobject_uevent(&q->kobj, KOBJ_REMOVE);
kobject_del(&q->kobj);
kobject_put(&disk->dev.kobj);
kobject_put(&disk_to_dev(disk)->kobj);
}
}
......@@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag);
* __blk_free_tags - release a given set of tag maintenance info
* @bqt: the tag map to free
*
* Tries to free the specified @bqt@. Returns true if it was
* Tries to free the specified @bqt. Returns true if it was
* actually freed and false if there are still references using it
*/
static int __blk_free_tags(struct blk_queue_tag *bqt)
......@@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct request_queue *q)
* blk_free_tags - release a given set of tag maintenance info
* @bqt: the tag map to free
*
* For externally managed @bqt@ frees the map. Callers of this
* For externally managed @bqt frees the map. Callers of this
* function must guarantee to have released all the queues that
* might have been using this tag map.
*/
......@@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags);
* @q: the request queue for the device
*
* Notes:
* This is used to disabled tagged queuing to a device, yet leave
* This is used to disable tagged queuing to a device, yet leave
* queue in function.
**/
void blk_queue_free_tags(struct request_queue *q)
......@@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
* @rq: the request that has completed
*
* Description:
* Typically called when end_that_request_first() returns 0, meaning
* Typically called when end_that_request_first() returns %0, meaning
* all transfers have been done for a request. It's important to call
* this function before end_that_request_last(), as that will put the
* request back on the free list thus corrupting the internal tag list.
......@@ -337,6 +337,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
int blk_queue_start_tag(struct request_queue *q, struct request *rq)
{
struct blk_queue_tag *bqt = q->queue_tags;
unsigned max_depth, offset;
int tag;
if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
......@@ -350,10 +351,19 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
/*
* Protect against shared tag maps, as we may not have exclusive
* access to the tag map.
*
* We reserve a few tags just for sync IO, since we don't want
* to starve sync IO on behalf of flooding async IO.
*/
max_depth = bqt->max_depth;
if (rq_is_sync(rq))
offset = 0;
else
offset = max_depth >> 2;
do {
tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
if (tag >= bqt->max_depth)
tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
if (tag >= max_depth)
return 1;
} while (test_and_set_bit_lock(tag, bqt->tag_map));
......
/*
* Functions related to generic timeout handling of requests.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/fault-inject.h>
#include "blk.h"
#ifdef CONFIG_FAIL_IO_TIMEOUT
static DECLARE_FAULT_ATTR(fail_io_timeout);
static int __init setup_fail_io_timeout(char *str)
{
return setup_fault_attr(&fail_io_timeout, str);
}
__setup("fail_io_timeout=", setup_fail_io_timeout);
int blk_should_fake_timeout(struct request_queue *q)
{
if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
return 0;
return should_fail(&fail_io_timeout, 1);
}
static int __init fail_io_timeout_debugfs(void)
{
return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
}
late_initcall(fail_io_timeout_debugfs);
ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags);
return sprintf(buf, "%d\n", set != 0);
}
ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct gendisk *disk = dev_to_disk(dev);
int val;
if (count) {
struct request_queue *q = disk->queue;
char *p = (char *) buf;
val = simple_strtoul(p, &p, 10);
spin_lock_irq(q->queue_lock);
if (val)
queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
else
queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
spin_unlock_irq(q->queue_lock);
}
return count;
}
#endif /* CONFIG_FAIL_IO_TIMEOUT */
/*
* blk_delete_timer - Delete/cancel timer for a given function.
* @req: request that we are canceling timer for
*
*/
void blk_delete_timer(struct request *req)
{
struct request_queue *q = req->q;
/*
* Nothing to detach
*/
if (!q->rq_timed_out_fn || !req->deadline)
return;
list_del_init(&req->timeout_list);
if (list_empty(&q->timeout_list))
del_timer(&q->timeout);
}
static void blk_rq_timed_out(struct request *req)
{
struct request_queue *q = req->q;
enum blk_eh_timer_return ret;
ret = q->rq_timed_out_fn(req);
switch (ret) {
case BLK_EH_HANDLED:
__blk_complete_request(req);
break;
case BLK_EH_RESET_TIMER:
blk_clear_rq_complete(req);
blk_add_timer(req);
break;
case BLK_EH_NOT_HANDLED:
/*
* LLD handles this for now but in the future
* we can send a request msg to abort the command
* and we can move more of the generic scsi eh code to
* the blk layer.
*/
break;
default:
printk(KERN_ERR "block: bad eh return: %d\n", ret);
break;
}
}
void blk_rq_timed_out_timer(unsigned long data)
{
struct request_queue *q = (struct request_queue *) data;
unsigned long flags, uninitialized_var(next), next_set = 0;
struct request *rq, *tmp;
spin_lock_irqsave(q->queue_lock, flags);
list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
if (time_after_eq(jiffies, rq->deadline)) {
list_del_init(&rq->timeout_list);
/*
* Check if we raced with end io completion
*/
if (blk_mark_rq_complete(rq))
continue;
blk_rq_timed_out(rq);
}
if (!next_set) {
next = rq->deadline;
next_set = 1;
} else if (time_after(next, rq->deadline))
next = rq->deadline;
}
if (next_set && !list_empty(&q->timeout_list))
mod_timer(&q->timeout, round_jiffies(next));
spin_unlock_irqrestore(q->queue_lock, flags);
}
/**
* blk_abort_request -- Request request recovery for the specified command
* @req: pointer to the request of interest
*
* This function requests that the block layer start recovery for the
* request by deleting the timer and calling the q's timeout function.
* LLDDs who implement their own error recovery MAY ignore the timeout
* event if they generated blk_abort_req. Must hold queue lock.
*/
void blk_abort_request(struct request *req)
{
if (blk_mark_rq_complete(req))
return;
blk_delete_timer(req);
blk_rq_timed_out(req);
}
EXPORT_SYMBOL_GPL(blk_abort_request);
/**
* blk_add_timer - Start timeout timer for a single request
* @req: request that is about to start running.
*
* Notes:
* Each request has its own timer, and as it is added to the queue, we
* set up the timer. When the request completes, we cancel the timer.
*/
void blk_add_timer(struct request *req)
{
struct request_queue *q = req->q;
unsigned long expiry;
if (!q->rq_timed_out_fn)
return;
BUG_ON(!list_empty(&req->timeout_list));
BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
if (req->timeout)
req->deadline = jiffies + req->timeout;
else {
req->deadline = jiffies + q->rq_timeout;
/*
* Some LLDs, like scsi, peek at the timeout to prevent
* a command from being retried forever.
*/
req->timeout = q->rq_timeout;
}
list_add_tail(&req->timeout_list, &q->timeout_list);
/*
* If the timer isn't already pending or this timeout is earlier
* than an existing one, modify the timer. Round to next nearest
* second.
*/
expiry = round_jiffies(req->deadline);
/*
* We use ->deadline == 0 to detect whether a timer was added or
* not, so just increase to next jiffy for that specific case
*/
if (unlikely(!req->deadline))
req->deadline = 1;
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires))
mod_timer(&q->timeout, expiry);
}
/**
* blk_abort_queue -- Abort all request on given queue
* @queue: pointer to queue
*
*/
void blk_abort_queue(struct request_queue *q)
{
unsigned long flags;
struct request *rq, *tmp;
spin_lock_irqsave(q->queue_lock, flags);
elv_abort_queue(q);
list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
blk_abort_request(rq);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(blk_abort_queue);
......@@ -17,6 +17,42 @@ void __blk_queue_free_tags(struct request_queue *q);
void blk_unplug_work(struct work_struct *work);
void blk_unplug_timeout(unsigned long data);
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);
void blk_add_timer(struct request *);
/*
* Internal atomic flags for request handling
*/
enum rq_atomic_flags {
REQ_ATOM_COMPLETE = 0,
};
/*
* EH timer and IO completion will both attempt to 'grab' the request, make
* sure that only one of them suceeds
*/
static inline int blk_mark_rq_complete(struct request *rq)
{
return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
}
static inline void blk_clear_rq_complete(struct request *rq)
{
clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
}
#ifdef CONFIG_FAIL_IO_TIMEOUT
int blk_should_fake_timeout(struct request_queue *);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
const char *, size_t);
#else
static inline int blk_should_fake_timeout(struct request_queue *q)
{
return 0;
}
#endif
struct io_context *current_io_context(gfp_t gfp_flags, int node);
......@@ -59,4 +95,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
#endif /* BLK_DEV_INTEGRITY */
static inline int blk_cpu_to_group(int cpu)
{
#ifdef CONFIG_SCHED_MC
cpumask_t mask = cpu_coregroup_map(cpu);
return first_cpu(mask);
#elif defined(CONFIG_SCHED_SMT)
return first_cpu(per_cpu(cpu_sibling_map, cpu));
#else
return cpu;
#endif
}
#endif
......@@ -111,23 +111,9 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
*/
static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
/*
* Bio action bits of interest
*/
static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
/*
* More could be added as needed, taking care to increment the decrementer
* to get correct indexing
*/
#define trace_barrier_bit(rw) \
(((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
#define trace_sync_bit(rw) \
(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
#define trace_ahead_bit(rw) \
(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
#define trace_meta_bit(rw) \
(((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
/* The ilog2() calls fall out because they're constant */
#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
(ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
/*
* The worker for the various blk_add_trace*() types. Fills out a
......@@ -147,10 +133,11 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
what |= ddir_act[rw & WRITE];
what |= bio_act[trace_barrier_bit(rw)];
what |= bio_act[trace_sync_bit(rw)];
what |= bio_act[trace_ahead_bit(rw)];
what |= bio_act[trace_meta_bit(rw)];
what |= MASK_TC_BIT(rw, BARRIER);
what |= MASK_TC_BIT(rw, SYNC);
what |= MASK_TC_BIT(rw, AHEAD);
what |= MASK_TC_BIT(rw, META);
what |= MASK_TC_BIT(rw, DISCARD);
pid = tsk->pid;
if (unlikely(act_log_check(bt, what, sector, pid)))
......@@ -382,7 +369,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!buts->buf_size || !buts->buf_nr)
return -EINVAL;
strcpy(buts->name, name);
strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
/*
* some device names have larger paths - convert the slashes
......
......@@ -283,7 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
next_rq->cmd_type = rq->cmd_type;
dxferp = (void*)(unsigned long)hdr->din_xferp;
ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len);
ret = blk_rq_map_user(q, next_rq, NULL, dxferp,
hdr->din_xfer_len, GFP_KERNEL);
if (ret)
goto out;
}
......@@ -298,7 +299,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
dxfer_len = 0;
if (dxfer_len) {
ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
GFP_KERNEL);
if (ret)
goto out;
}
......
......@@ -39,6 +39,7 @@ static int cfq_slice_idle = HZ / 125;
#define CFQ_MIN_TT (2)
#define CFQ_SLICE_SCALE (5)
#define CFQ_HW_QUEUE_MIN (5)
#define RQ_CIC(rq) \
((struct cfq_io_context *) (rq)->elevator_private)
......@@ -86,7 +87,14 @@ struct cfq_data {
int rq_in_driver;
int sync_flight;
/*
* queue-depth detection
*/
int rq_queued;
int hw_tag;
int hw_tag_samples;
int rq_in_driver_peak;
/*
* idle window management
......@@ -244,7 +252,7 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
{
if (cfqd->busy_queues) {
cfq_log(cfqd, "schedule dispatch");
kblockd_schedule_work(&cfqd->unplug_work);
kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
}
}
......@@ -654,15 +662,6 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
cfqd->rq_in_driver);
/*
* If the depth is larger 1, it really could be queueing. But lets
* make the mark a little higher - idling could still be good for
* low queueing, and a low queueing number could also just indicate
* a SCSI mid layer like behaviour where limit+1 is often seen.
*/
if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
cfqd->hw_tag = 1;
cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
}
......@@ -686,6 +685,7 @@ static void cfq_remove_request(struct request *rq)
list_del_init(&rq->queuelist);
cfq_del_rq_rb(rq);
cfqq->cfqd->rq_queued--;
if (rq_is_meta(rq)) {
WARN_ON(!cfqq->meta_pending);
cfqq->meta_pending--;
......@@ -878,6 +878,14 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
struct cfq_io_context *cic;
unsigned long sl;
/*
* SSD device without seek penalty, disable idling. But only do so
* for devices that support queuing, otherwise we still have a problem
* with sync vs async workloads.
*/
if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
return;
WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
WARN_ON(cfq_cfqq_slice_new(cfqq));
......@@ -1833,6 +1841,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
{
struct cfq_io_context *cic = RQ_CIC(rq);
cfqd->rq_queued++;
if (rq_is_meta(rq))
cfqq->meta_pending++;
......@@ -1880,6 +1889,31 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
cfq_rq_enqueued(cfqd, cfqq, rq);
}
/*
* Update hw_tag based on peak queue depth over 50 samples under
* sufficient load.
*/
static void cfq_update_hw_tag(struct cfq_data *cfqd)
{
if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
return;
if (cfqd->hw_tag_samples++ < 50)
return;
if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN)
cfqd->hw_tag = 1;
else
cfqd->hw_tag = 0;
cfqd->hw_tag_samples = 0;
cfqd->rq_in_driver_peak = 0;
}
static void cfq_completed_request(struct request_queue *q, struct request *rq)
{
struct cfq_queue *cfqq = RQ_CFQQ(rq);
......@@ -1890,6 +1924,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
now = jiffies;
cfq_log_cfqq(cfqd, cfqq, "complete");
cfq_update_hw_tag(cfqd);
WARN_ON(!cfqd->rq_in_driver);
WARN_ON(!cfqq->dispatched);
cfqd->rq_in_driver--;
......@@ -2200,6 +2236,7 @@ static void *cfq_init_queue(struct request_queue *q)
cfqd->cfq_slice[1] = cfq_slice_sync;
cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
cfqd->cfq_slice_idle = cfq_slice_idle;
cfqd->hw_tag = 1;
return cfqd;
}
......
......@@ -211,14 +211,10 @@ int blk_register_filter(struct gendisk *disk)
{
int ret;
struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
struct kobject *parent = kobject_get(disk->holder_dir->parent);
if (!parent)
return -ENODEV;
ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
&disk_to_dev(disk)->kobj,
"%s", "cmd_filter");
if (ret < 0)
return ret;
......@@ -231,7 +227,6 @@ void blk_unregister_filter(struct gendisk *disk)
struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
kobject_put(&filter->kobj);
kobject_put(disk->holder_dir->parent);
}
EXPORT_SYMBOL(blk_unregister_filter);
#endif
......@@ -788,6 +788,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
case BLKFLSBUF:
case BLKROSET:
case BLKDISCARD:
/*
* the ones below are implemented in blkdev_locked_ioctl,
* but we call blkdev_ioctl, which gets the lock for us
......
......@@ -33,7 +33,7 @@ struct deadline_data {
*/
struct rb_root sort_list[2];
struct list_head fifo_list[2];
/*
* next in sort order. read, write or both are NULL
*/
......@@ -53,7 +53,11 @@ struct deadline_data {
static void deadline_move_request(struct deadline_data *, struct request *);
#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))])
static inline struct rb_root *
deadline_rb_root(struct deadline_data *dd, struct request *rq)
{
return &dd->sort_list[rq_data_dir(rq)];
}
/*
* get the request after `rq' in sector-sorted order
......@@ -72,15 +76,11 @@ deadline_latter_request(struct request *rq)
static void
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
{
struct rb_root *root = RQ_RB_ROOT(dd, rq);
struct rb_root *root = deadline_rb_root(dd, rq);
struct request *__alias;
retry:
__alias = elv_rb_add(root, rq);
if (unlikely(__alias)) {
while (unlikely(__alias = elv_rb_add(root, rq)))
deadline_move_request(dd, __alias);
goto retry;
}
}
static inline void
......@@ -91,7 +91,7 @@ deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
if (dd->next_rq[data_dir] == rq)
dd->next_rq[data_dir] = deadline_latter_request(rq);
elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
elv_rb_del(deadline_rb_root(dd, rq), rq);
}
/*
......@@ -106,7 +106,7 @@ deadline_add_request(struct request_queue *q, struct request *rq)
deadline_add_rq_rb(dd, rq);
/*
* set expire time (only used for reads) and add to fifo list
* set expire time and add to fifo list
*/
rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
......@@ -162,7 +162,7 @@ static void deadline_merged_request(struct request_queue *q,
* if the merge was a front merge, we need to reposition request
*/
if (type == ELEVATOR_FRONT_MERGE) {
elv_rb_del(RQ_RB_ROOT(dd, req), req);
elv_rb_del(deadline_rb_root(dd, req), req);
deadline_add_rq_rb(dd, req);
}
}
......@@ -212,7 +212,7 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
dd->next_rq[WRITE] = NULL;
dd->next_rq[data_dir] = deadline_latter_request(rq);
dd->last_sector = rq->sector + rq->nr_sectors;
dd->last_sector = rq_end_sector(rq);
/*
* take it off the sort and fifo list, move
......@@ -222,7 +222,7 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
}
/*
* deadline_check_fifo returns 0 if there are no expired reads on the fifo,
* deadline_check_fifo returns 0 if there are no expired requests on the fifo,
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
*/
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
......@@ -258,17 +258,9 @@ static int deadline_dispatch_requests(struct request_queue *q, int force)
else
rq = dd->next_rq[READ];
if (rq) {
/* we have a "next request" */
if (dd->last_sector != rq->sector)
/* end the batch on a non sequential request */
dd->batching += dd->fifo_batch;
if (dd->batching < dd->fifo_batch)
/* we are still entitled to batch */
goto dispatch_request;
}
if (rq && dd->batching < dd->fifo_batch)
/* we have a next request are still entitled to batch */
goto dispatch_request;
/*
* at this point we are not running a batch. select the appropriate
......
......@@ -34,8 +34,9 @@
#include <linux/delay.h>
#include <linux/blktrace_api.h>
#include <linux/hash.h>
#include <linux/uaccess.h>
#include <asm/uaccess.h>
#include "blk.h"
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
......@@ -74,6 +75,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
if (!rq_mergeable(rq))
return 0;
/*
* Don't merge file system requests and discard requests
*/
if (bio_discard(bio) != bio_discard(rq->bio))
return 0;
/*
* different data direction or already started, don't merge
*/
......@@ -438,6 +445,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
list_for_each_prev(entry, &q->queue_head) {
struct request *pos = list_entry_rq(entry);
if (blk_discard_rq(rq) != blk_discard_rq(pos))
break;
if (rq_data_dir(rq) != rq_data_dir(pos))
break;
if (pos->cmd_flags & stop_flags)
......@@ -607,7 +616,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
break;
case ELEVATOR_INSERT_SORT:
BUG_ON(!blk_fs_request(rq));
BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
rq->cmd_flags |= REQ_SORTED;
q->nr_sorted++;
if (rq_mergeable(rq)) {
......@@ -692,7 +701,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where,
* this request is scheduling boundary, update
* end_sector
*/
if (blk_fs_request(rq)) {
if (blk_fs_request(rq) || blk_discard_rq(rq)) {
q->end_sector = rq_end_sector(rq);
q->boundary_rq = rq;
}
......@@ -745,7 +754,7 @@ struct request *elv_next_request(struct request_queue *q)
* not ever see it.
*/
if (blk_empty_barrier(rq)) {
end_queued_request(rq, 1);
__blk_end_request(rq, 0, blk_rq_bytes(rq));
continue;
}
if (!(rq->cmd_flags & REQ_STARTED)) {
......@@ -764,6 +773,12 @@ struct request *elv_next_request(struct request_queue *q)
*/
rq->cmd_flags |= REQ_STARTED;
blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
/*
* We are now handing the request to the hardware,
* add the timeout handler
*/
blk_add_timer(rq);
}
if (!q->boundary_rq || q->boundary_rq == rq) {
......@@ -782,7 +797,6 @@ struct request *elv_next_request(struct request_queue *q)
* device can handle
*/
rq->nr_phys_segments++;
rq->nr_hw_segments++;
}
if (!q->prep_rq_fn)
......@@ -805,14 +819,13 @@ struct request *elv_next_request(struct request_queue *q)
* so that we don't add it again
*/
--rq->nr_phys_segments;
--rq->nr_hw_segments;
}
rq = NULL;
break;
} else if (ret == BLKPREP_KILL) {
rq->cmd_flags |= REQ_QUIET;
end_queued_request(rq, 0);
__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
} else {
printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
break;
......@@ -901,6 +914,19 @@ int elv_may_queue(struct request_queue *q, int rw)
return ELV_MQUEUE_MAY;
}
void elv_abort_queue(struct request_queue *q)
{
struct request *rq;
while (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
rq->cmd_flags |= REQ_QUIET;
blk_add_trace_rq(q, rq, BLK_TA_ABORT);
__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
}
}
EXPORT_SYMBOL(elv_abort_queue);
void elv_completed_request(struct request_queue *q, struct request *rq)
{
elevator_t *e = q->elevator;
......
This diff is collapsed.
......@@ -12,11 +12,12 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
{
struct block_device *bdevp;
struct gendisk *disk;
struct hd_struct *part;
struct blkpg_ioctl_arg a;
struct blkpg_partition p;
struct disk_part_iter piter;
long long start, length;
int part;
int i;
int partno;
int err;
if (!capable(CAP_SYS_ADMIN))
......@@ -28,8 +29,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
disk = bdev->bd_disk;
if (bdev != bdev->bd_contains)
return -EINVAL;
part = p.pno;
if (part <= 0 || part >= disk->minors)
partno = p.pno;
if (partno <= 0)
return -EINVAL;
switch (a.op) {
case BLKPG_ADD_PARTITION:
......@@ -43,36 +44,37 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
|| pstart < 0 || plength < 0)
return -EINVAL;
}
/* partition number in use? */
mutex_lock(&bdev->bd_mutex);
if (disk->part[part - 1]) {
mutex_unlock(&bdev->bd_mutex);
return -EBUSY;
}
/* overlap? */
for (i = 0; i < disk->minors - 1; i++) {
struct hd_struct *s = disk->part[i];
if (!s)
continue;
if (!(start+length <= s->start_sect ||
start >= s->start_sect + s->nr_sects)) {
/* overlap? */
disk_part_iter_init(&piter, disk,
DISK_PITER_INCL_EMPTY);
while ((part = disk_part_iter_next(&piter))) {
if (!(start + length <= part->start_sect ||
start >= part->start_sect + part->nr_sects)) {
disk_part_iter_exit(&piter);
mutex_unlock(&bdev->bd_mutex);
return -EBUSY;
}
}
disk_part_iter_exit(&piter);
/* all seems OK */
err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
err = add_partition(disk, partno, start, length,
ADDPART_FLAG_NONE);
mutex_unlock(&bdev->bd_mutex);
return err;
case BLKPG_DEL_PARTITION:
if (!disk->part[part-1])
return -ENXIO;
if (disk->part[part - 1]->nr_sects == 0)
part = disk_get_part(disk, partno);
if (!part)
return -ENXIO;
bdevp = bdget_disk(disk, part);
bdevp = bdget(part_devt(part));
disk_put_part(part);
if (!bdevp)
return -ENOMEM;
mutex_lock(&bdevp->bd_mutex);
if (bdevp->bd_openers) {
mutex_unlock(&bdevp->bd_mutex);
......@@ -84,7 +86,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
invalidate_bdev(bdevp);
mutex_lock_nested(&bdev->bd_mutex, 1);
delete_partition(disk, part);
delete_partition(disk, partno);
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdevp->bd_mutex);
bdput(bdevp);
......@@ -100,7 +102,7 @@ static int blkdev_reread_part(struct block_device *bdev)
struct gendisk *disk = bdev->bd_disk;
int res;
if (disk->minors == 1 || bdev != bdev->bd_contains)
if (!disk_partitionable(disk) || bdev != bdev->bd_contains)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
......@@ -111,6 +113,69 @@ static int blkdev_reread_part(struct block_device *bdev)
return res;
}
static void blk_ioc_discard_endio(struct bio *bio, int err)
{
if (err) {
if (err == -EOPNOTSUPP)
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
clear_bit(BIO_UPTODATE, &bio->bi_flags);
}
complete(bio->bi_private);
}
static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
uint64_t len)
{
struct request_queue *q = bdev_get_queue(bdev);
int ret = 0;
if (start & 511)
return -EINVAL;
if (len & 511)
return -EINVAL;
start >>= 9;
len >>= 9;
if (start + len > (bdev->bd_inode->i_size >> 9))
return -EINVAL;
if (!q->prepare_discard_fn)
return -EOPNOTSUPP;
while (len && !ret) {
DECLARE_COMPLETION_ONSTACK(wait);
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 0);
if (!bio)
return -ENOMEM;
bio->bi_end_io = blk_ioc_discard_endio;
bio->bi_bdev = bdev;
bio->bi_private = &wait;
bio->bi_sector = start;
if (len > q->max_hw_sectors) {
bio->bi_size = q->max_hw_sectors << 9;
len -= q->max_hw_sectors;
start += q->max_hw_sectors;
} else {
bio->bi_size = len << 9;
len = 0;
}
submit_bio(DISCARD_NOBARRIER, bio);
wait_for_completion(&wait);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
else if (!bio_flagged(bio, BIO_UPTODATE))
ret = -EIO;
bio_put(bio);
}
return ret;
}
static int put_ushort(unsigned long arg, unsigned short val)
{
return put_user(val, (unsigned short __user *)arg);
......@@ -258,6 +323,19 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
set_device_ro(bdev, n);
unlock_kernel();
return 0;
case BLKDISCARD: {
uint64_t range[2];
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
return -EFAULT;
return blk_ioctl_discard(bdev, range[0], range[1]);
}
case HDIO_GETGEO: {
struct hd_geometry geo;
......
......@@ -185,6 +185,7 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
__set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
__set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
__set_bit(GPCMD_SET_STREAMING, filter->write_ok);
__set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
}
EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
......@@ -313,11 +314,12 @@ static int sg_io(struct file *file, struct request_queue *q,
goto out;
}
ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count,
hdr->dxfer_len);
ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count,
hdr->dxfer_len, GFP_KERNEL);
kfree(iov);
} else if (hdr->dxfer_len)
ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len);
ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
GFP_KERNEL);
if (ret)
goto out;
......
......@@ -33,6 +33,7 @@
*/
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/pci.h>
#include <scsi/scsi.h>
#include <scsi/scsi_host.h>
......@@ -459,29 +460,29 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
* RETURNS:
* EH_HANDLED or EH_NOT_HANDLED
*/
enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
{
struct Scsi_Host *host = cmd->device->host;
struct ata_port *ap = ata_shost_to_port(host);
unsigned long flags;
struct ata_queued_cmd *qc;
enum scsi_eh_timer_return ret;
enum blk_eh_timer_return ret;
DPRINTK("ENTER\n");
if (ap->ops->error_handler) {
ret = EH_NOT_HANDLED;
ret = BLK_EH_NOT_HANDLED;
goto out;
}
ret = EH_HANDLED;
ret = BLK_EH_HANDLED;
spin_lock_irqsave(ap->lock, flags);
qc = ata_qc_from_tag(ap, ap->link.active_tag);
if (qc) {
WARN_ON(qc->scsicmd != cmd);
qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
qc->err_mask |= AC_ERR_TIMEOUT;
ret = EH_NOT_HANDLED;
ret = BLK_EH_NOT_HANDLED;
}
spin_unlock_irqrestore(ap->lock, flags);
......@@ -833,7 +834,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
* Note that ATA_QCFLAG_FAILED is unconditionally set after
* this function completes.
*/
scsi_req_abort_cmd(qc->scsicmd);
blk_abort_request(qc->scsicmd->request);
}
/**
......
......@@ -1085,6 +1085,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN);
} else {
if (ata_id_is_ssd(dev->id))
queue_flag_set_unlocked(QUEUE_FLAG_NONROT,
sdev->request_queue);
/* ATA devices must be sector aligned */
blk_queue_update_dma_alignment(sdev->request_queue,
ATA_SECT_SIZE - 1);
......
......@@ -155,7 +155,7 @@ extern int ata_bus_probe(struct ata_port *ap);
/* libata-eh.c */
extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
extern void ata_scsi_error(struct Scsi_Host *host);
extern void ata_port_wait_eh(struct ata_port *ap);
extern void ata_eh_fastdrain_timerfn(unsigned long arg);
......
......@@ -54,7 +54,7 @@ struct driver_private {
*/
struct class_private {
struct kset class_subsys;
struct list_head class_devices;
struct klist class_devices;
struct list_head class_interfaces;
struct kset class_dirs;
struct mutex class_mutex;
......
......@@ -135,6 +135,20 @@ static void remove_class_attrs(struct class *cls)
}
}
static void klist_class_dev_get(struct klist_node *n)
{
struct device *dev = container_of(n, struct device, knode_class);
get_device(dev);
}
static void klist_class_dev_put(struct klist_node *n)
{
struct device *dev = container_of(n, struct device, knode_class);
put_device(dev);
}
int __class_register(struct class *cls, struct lock_class_key *key)
{
struct class_private *cp;
......@@ -145,7 +159,7 @@ int __class_register(struct class *cls, struct lock_class_key *key)
cp = kzalloc(sizeof(*cp), GFP_KERNEL);
if (!cp)
return -ENOMEM;
INIT_LIST_HEAD(&cp->class_devices);
klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put);
INIT_LIST_HEAD(&cp->class_interfaces);
kset_init(&cp->class_dirs);
__mutex_init(&cp->class_mutex, "struct class mutex", key);
......@@ -268,6 +282,71 @@ char *make_class_name(const char *name, struct kobject *kobj)
}
#endif
/**
* class_dev_iter_init - initialize class device iterator
* @iter: class iterator to initialize
* @class: the class we wanna iterate over
* @start: the device to start iterating from, if any
* @type: device_type of the devices to iterate over, NULL for all
*
* Initialize class iterator @iter such that it iterates over devices
* of @class. If @start is set, the list iteration will start there,
* otherwise if it is NULL, the iteration starts at the beginning of
* the list.
*/
void class_dev_iter_init(struct class_dev_iter *iter, struct class *class,
struct device *start, const struct device_type *type)
{
struct klist_node *start_knode = NULL;
if (start)
start_knode = &start->knode_class;
klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode);
iter->type = type;
}
EXPORT_SYMBOL_GPL(class_dev_iter_init);
/**
* class_dev_iter_next - iterate to the next device
* @iter: class iterator to proceed
*
* Proceed @iter to the next device and return it. Returns NULL if
* iteration is complete.
*
* The returned device is referenced and won't be released till
* iterator is proceed to the next device or exited. The caller is
* free to do whatever it wants to do with the device including
* calling back into class code.
*/
struct device *class_dev_iter_next(struct class_dev_iter *iter)
{
struct klist_node *knode;
struct device *dev;
while (1) {
knode = klist_next(&iter->ki);
if (!knode)
return NULL;
dev = container_of(knode, struct device, knode_class);
if (!iter->type || iter->type == dev->type)
return dev;
}
}
EXPORT_SYMBOL_GPL(class_dev_iter_next);
/**
* class_dev_iter_exit - finish iteration
* @iter: class iterator to finish
*
* Finish an iteration. Always call this function after iteration is
* complete whether the iteration ran till the end or not.
*/
void class_dev_iter_exit(struct class_dev_iter *iter)
{
klist_iter_exit(&iter->ki);
}
EXPORT_SYMBOL_GPL(class_dev_iter_exit);
/**
* class_for_each_device - device iterator
* @class: the class we're iterating
......@@ -283,13 +362,13 @@ char *make_class_name(const char *name, struct kobject *kobj)
* We check the return of @fn each time. If it returns anything
* other than 0, we break out and return that value.
*
* Note, we hold class->class_mutex in this function, so it can not be
* re-acquired in @fn, otherwise it will self-deadlocking. For
* example, calls to add or remove class members would be verboten.
* @fn is allowed to do anything including calling back into class
* code. There's no locking restriction.
*/
int class_for_each_device(struct class *class, struct device *start,
void *data, int (*fn)(struct device *, void *))
{
struct class_dev_iter iter;
struct device *dev;
int error = 0;
......@@ -301,20 +380,13 @@ int class_for_each_device(struct class *class, struct device *start,
return -EINVAL;
}
mutex_lock(&class->p->class_mutex);
list_for_each_entry(dev, &class->p->class_devices, node) {
if (start) {
if (start == dev)
start = NULL;
continue;
}
dev = get_device(dev);
class_dev_iter_init(&iter, class, start, NULL);
while ((dev = class_dev_iter_next(&iter))) {
error = fn(dev, data);
put_device(dev);
if (error)
break;
}
mutex_unlock(&class->p->class_mutex);
class_dev_iter_exit(&iter);
return error;
}
......@@ -337,16 +409,15 @@ EXPORT_SYMBOL_GPL(class_for_each_device);
*
* Note, you will need to drop the reference with put_device() after use.
*
* We hold class->class_mutex in this function, so it can not be
* re-acquired in @match, otherwise it will self-deadlocking. For
* example, calls to add or remove class members would be verboten.
* @fn is allowed to do anything including calling back into class
* code. There's no locking restriction.
*/
struct device *class_find_device(struct class *class, struct device *start,
void *data,
int (*match)(struct device *, void *))
{
struct class_dev_iter iter;
struct device *dev;
int found = 0;
if (!class)
return NULL;
......@@ -356,29 +427,23 @@ struct device *class_find_device(struct class *class, struct device *start,
return NULL;
}
mutex_lock(&class->p->class_mutex);
list_for_each_entry(dev, &class->p->class_devices, node) {
if (start) {
if (start == dev)
start = NULL;
continue;
}
dev = get_device(dev);
class_dev_iter_init(&iter, class, start, NULL);
while ((dev = class_dev_iter_next(&iter))) {
if (match(dev, data)) {
found = 1;
get_device(dev);
break;
} else
put_device(dev);
}
}
mutex_unlock(&class->p->class_mutex);
class_dev_iter_exit(&iter);
return found ? dev : NULL;
return dev;
}
EXPORT_SYMBOL_GPL(class_find_device);
int class_interface_register(struct class_interface *class_intf)
{
struct class *parent;
struct class_dev_iter iter;
struct device *dev;
if (!class_intf || !class_intf->class)
......@@ -391,8 +456,10 @@ int class_interface_register(struct class_interface *class_intf)
mutex_lock(&parent->p->class_mutex);
list_add_tail(&class_intf->node, &parent->p->class_interfaces);
if (class_intf->add_dev) {
list_for_each_entry(dev, &parent->p->class_devices, node)
class_dev_iter_init(&iter, parent, NULL, NULL);
while ((dev = class_dev_iter_next(&iter)))
class_intf->add_dev(dev, class_intf);
class_dev_iter_exit(&iter);
}
mutex_unlock(&parent->p->class_mutex);
......@@ -402,6 +469,7 @@ int class_interface_register(struct class_interface *class_intf)
void class_interface_unregister(struct class_interface *class_intf)
{
struct class *parent = class_intf->class;
struct class_dev_iter iter;
struct device *dev;
if (!parent)
......@@ -410,8 +478,10 @@ void class_interface_unregister(struct class_interface *class_intf)
mutex_lock(&parent->p->class_mutex);
list_del_init(&class_intf->node);
if (class_intf->remove_dev) {
list_for_each_entry(dev, &parent->p->class_devices, node)
class_dev_iter_init(&iter, parent, NULL, NULL);
while ((dev = class_dev_iter_next(&iter)))
class_intf->remove_dev(dev, class_intf);
class_dev_iter_exit(&iter);
}
mutex_unlock(&parent->p->class_mutex);
......
......@@ -536,7 +536,6 @@ void device_initialize(struct device *dev)
klist_init(&dev->klist_children, klist_children_get,
klist_children_put);
INIT_LIST_HEAD(&dev->dma_pools);
INIT_LIST_HEAD(&dev->node);
init_MUTEX(&dev->sem);
spin_lock_init(&dev->devres_lock);
INIT_LIST_HEAD(&dev->devres_head);
......@@ -916,7 +915,8 @@ int device_add(struct device *dev)
if (dev->class) {
mutex_lock(&dev->class->p->class_mutex);
/* tie the class to the device */
list_add_tail(&dev->node, &dev->class->p->class_devices);
klist_add_tail(&dev->knode_class,
&dev->class->p->class_devices);
/* notify any interfaces that the device is here */
list_for_each_entry(class_intf,
......@@ -1032,7 +1032,7 @@ void device_del(struct device *dev)
if (class_intf->remove_dev)
class_intf->remove_dev(dev, class_intf);
/* remove the device from the class list */
list_del_init(&dev->node);
klist_del(&dev->knode_class);
mutex_unlock(&dev->class->p->class_mutex);
}
device_remove_file(dev, &uevent_attr);
......
......@@ -109,12 +109,12 @@ static const struct attribute_group attr_group = {
static int
aoedisk_add_sysfs(struct aoedev *d)
{
return sysfs_create_group(&d->gd->dev.kobj, &attr_group);
return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
}
void
aoedisk_rm_sysfs(struct aoedev *d)
{
sysfs_remove_group(&d->gd->dev.kobj, &attr_group);
sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
}
static int
......@@ -276,7 +276,7 @@ aoeblk_gdalloc(void *vp)
gd->first_minor = d->sysminor * AOE_PARTITIONS;
gd->fops = &aoe_bdops;
gd->private_data = d;
gd->capacity = d->ssize;
set_capacity(gd, d->ssize);
snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
d->aoemajor, d->aoeminor);
......
......@@ -645,7 +645,7 @@ aoecmd_sleepwork(struct work_struct *work)
unsigned long flags;
u64 ssize;
ssize = d->gd->capacity;
ssize = get_capacity(d->gd);
bd = bdget_disk(d->gd, 0);
if (bd) {
......@@ -707,7 +707,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
return;
if (d->gd != NULL) {
d->gd->capacity = ssize;
set_capacity(d->gd, ssize);
d->flags |= DEVFL_NEWSIZE;
} else
d->flags |= DEVFL_GDALLOC;
......@@ -756,12 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector
unsigned long n_sect = bio->bi_size >> 9;
const int rw = bio_data_dir(bio);
struct hd_struct *part;
int cpu;
part = get_part(disk, sector);
all_stat_inc(disk, part, ios[rw], sector);
all_stat_add(disk, part, ticks[rw], duration, sector);
all_stat_add(disk, part, sectors[rw], n_sect, sector);
all_stat_add(disk, part, io_ticks, duration, sector);
cpu = part_stat_lock();
part = disk_map_sector_rcu(disk, sector);
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, ticks[rw], duration);
part_stat_add(cpu, part, sectors[rw], n_sect);
part_stat_add(cpu, part, io_ticks, duration);
part_stat_unlock();
}
void
......
......@@ -91,7 +91,7 @@ aoedev_downdev(struct aoedev *d)
}
if (d->gd)
d->gd->capacity = 0;
set_capacity(d->gd, 0);
d->flags &= ~DEVFL_UP;
}
......
......@@ -3460,8 +3460,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
hba[i]->cmd_pool_bits =
kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG -
1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL);
kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
* sizeof(unsigned long), GFP_KERNEL);
hba[i]->cmd_pool = (CommandList_struct *)
pci_alloc_consistent(hba[i]->pdev,
hba[i]->nr_cmds * sizeof(CommandList_struct),
......@@ -3493,8 +3493,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
/* command and error info recs zeroed out before
they are used */
memset(hba[i]->cmd_pool_bits, 0,
((hba[i]->nr_cmds + BITS_PER_LONG -
1) / BITS_PER_LONG) * sizeof(unsigned long));
DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
* sizeof(unsigned long));
hba[i]->num_luns = 0;
hba[i]->highest_lun = -1;
......
......@@ -365,7 +365,7 @@ struct scsi2map {
static int
cciss_scsi_add_entry(int ctlr, int hostno,
unsigned char *scsi3addr, int devtype,
struct cciss_scsi_dev_t *device,
struct scsi2map *added, int *nadded)
{
/* assumes hba[ctlr]->scsi_ctlr->lock is held */
......@@ -384,12 +384,12 @@ cciss_scsi_add_entry(int ctlr, int hostno,
lun = 0;
/* Is this device a non-zero lun of a multi-lun device */
/* byte 4 of the 8-byte LUN addr will contain the logical unit no. */
if (scsi3addr[4] != 0) {
if (device->scsi3addr[4] != 0) {
/* Search through our list and find the device which */
/* has the same 8 byte LUN address, excepting byte 4. */
/* Assign the same bus and target for this new LUN. */
/* Use the logical unit number from the firmware. */
memcpy(addr1, scsi3addr, 8);
memcpy(addr1, device->scsi3addr, 8);
addr1[4] = 0;
for (i = 0; i < n; i++) {
sd = &ccissscsi[ctlr].dev[i];
......@@ -399,7 +399,7 @@ cciss_scsi_add_entry(int ctlr, int hostno,
if (memcmp(addr1, addr2, 8) == 0) {
bus = sd->bus;
target = sd->target;
lun = scsi3addr[4];
lun = device->scsi3addr[4];
break;
}
}
......@@ -420,8 +420,12 @@ cciss_scsi_add_entry(int ctlr, int hostno,
added[*nadded].lun = sd->lun;
(*nadded)++;
memcpy(&sd->scsi3addr[0], scsi3addr, 8);
sd->devtype = devtype;
memcpy(sd->scsi3addr, device->scsi3addr, 8);
memcpy(sd->vendor, device->vendor, sizeof(sd->vendor));
memcpy(sd->revision, device->revision, sizeof(sd->revision));
memcpy(sd->device_id, device->device_id, sizeof(sd->device_id));
sd->devtype = device->devtype;
ccissscsi[ctlr].ndevices++;
/* initially, (before registering with scsi layer) we don't
......@@ -487,6 +491,22 @@ static void fixup_botched_add(int ctlr, char *scsi3addr)
CPQ_TAPE_UNLOCK(ctlr, flags);
}
static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
struct cciss_scsi_dev_t *dev2)
{
return dev1->devtype == dev2->devtype &&
memcmp(dev1->scsi3addr, dev2->scsi3addr,
sizeof(dev1->scsi3addr)) == 0 &&
memcmp(dev1->device_id, dev2->device_id,
sizeof(dev1->device_id)) == 0 &&
memcmp(dev1->vendor, dev2->vendor,
sizeof(dev1->vendor)) == 0 &&
memcmp(dev1->model, dev2->model,
sizeof(dev1->model)) == 0 &&
memcmp(dev1->revision, dev2->revision,
sizeof(dev1->revision)) == 0;
}
static int
adjust_cciss_scsi_table(int ctlr, int hostno,
struct cciss_scsi_dev_t sd[], int nsds)
......@@ -532,7 +552,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
for (j=0;j<nsds;j++) {
if (SCSI3ADDR_EQ(sd[j].scsi3addr,
csd->scsi3addr)) {
if (sd[j].devtype == csd->devtype)
if (device_is_the_same(&sd[j], csd))
found=2;
else
found=1;
......@@ -548,22 +568,26 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
cciss_scsi_remove_entry(ctlr, hostno, i,
removed, &nremoved);
/* remove ^^^, hence i not incremented */
}
else if (found == 1) { /* device is different kind */
} else if (found == 1) { /* device is different in some way */
changes++;
printk("cciss%d: device c%db%dt%dl%d type changed "
"(device type now %s).\n",
ctlr, hostno, csd->bus, csd->target, csd->lun,
scsi_device_type(csd->devtype));
printk("cciss%d: device c%db%dt%dl%d has changed.\n",
ctlr, hostno, csd->bus, csd->target, csd->lun);
cciss_scsi_remove_entry(ctlr, hostno, i,
removed, &nremoved);
/* remove ^^^, hence i not incremented */
if (cciss_scsi_add_entry(ctlr, hostno,
&sd[j].scsi3addr[0], sd[j].devtype,
if (cciss_scsi_add_entry(ctlr, hostno, &sd[j],
added, &nadded) != 0)
/* we just removed one, so add can't fail. */
BUG();
csd->devtype = sd[j].devtype;
memcpy(csd->device_id, sd[j].device_id,
sizeof(csd->device_id));
memcpy(csd->vendor, sd[j].vendor,
sizeof(csd->vendor));
memcpy(csd->model, sd[j].model,
sizeof(csd->model));
memcpy(csd->revision, sd[j].revision,
sizeof(csd->revision));
} else /* device is same as it ever was, */
i++; /* so just move along. */
}
......@@ -577,7 +601,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
csd = &ccissscsi[ctlr].dev[j];
if (SCSI3ADDR_EQ(sd[i].scsi3addr,
csd->scsi3addr)) {
if (sd[i].devtype == csd->devtype)
if (device_is_the_same(&sd[i], csd))
found=2; /* found device */
else
found=1; /* found a bug. */
......@@ -586,16 +610,14 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
}
if (!found) {
changes++;
if (cciss_scsi_add_entry(ctlr, hostno,
&sd[i].scsi3addr[0], sd[i].devtype,
if (cciss_scsi_add_entry(ctlr, hostno, &sd[i],
added, &nadded) != 0)
break;
} else if (found == 1) {
/* should never happen... */
changes++;
printk("cciss%d: device unexpectedly changed type\n",
ctlr);
printk(KERN_WARNING "cciss%d: device "
"unexpectedly changed\n", ctlr);
/* but if it does happen, we just ignore that device */
}
}
......@@ -1012,7 +1034,8 @@ cciss_scsi_interpret_error(CommandList_struct *cp)
static int
cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr,
unsigned char *buf, unsigned char bufsize)
unsigned char page, unsigned char *buf,
unsigned char bufsize)
{
int rc;
CommandList_struct *cp;
......@@ -1032,8 +1055,8 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr,
ei = cp->err_info;
cdb[0] = CISS_INQUIRY;
cdb[1] = 0;
cdb[2] = 0;
cdb[1] = (page != 0);
cdb[2] = page;
cdb[3] = 0;
cdb[4] = bufsize;
cdb[5] = 0;
......@@ -1053,6 +1076,25 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr,
return rc;
}
/* Get the device id from inquiry page 0x83 */
static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr,
unsigned char *device_id, int buflen)
{
int rc;
unsigned char *buf;
if (buflen > 16)
buflen = 16;
buf = kzalloc(64, GFP_KERNEL);
if (!buf)
return -1;
rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64);
if (rc == 0)
memcpy(device_id, &buf[8], buflen);
kfree(buf);
return rc != 0;
}
static int
cciss_scsi_do_report_phys_luns(ctlr_info_t *c,
ReportLunData_struct *buf, int bufsize)
......@@ -1142,25 +1184,21 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
ctlr_info_t *c;
__u32 num_luns=0;
unsigned char *ch;
/* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */
struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
struct cciss_scsi_dev_t *currentsd, *this_device;
int ncurrent=0;
int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8;
int i;
c = (ctlr_info_t *) hba[cntl_num];
ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
if (ld_buff == NULL) {
printk(KERN_ERR "cciss: out of memory\n");
return;
}
inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
if (inq_buff == NULL) {
printk(KERN_ERR "cciss: out of memory\n");
kfree(ld_buff);
return;
currentsd = kzalloc(sizeof(*currentsd) *
(CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL);
if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) {
printk(KERN_ERR "cciss: out of memory\n");
goto out;
}
this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) {
ch = &ld_buff->LUNListLength[0];
num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
......@@ -1179,23 +1217,34 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
/* adjust our table of devices */
for(i=0; i<num_luns; i++)
{
int devtype;
for (i = 0; i < num_luns; i++) {
/* for each physical lun, do an inquiry */
if (ld_buff->LUN[i][3] & 0xC0) continue;
memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff,
(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff,
(unsigned char) OBDR_TAPE_INQ_SIZE) != 0)
/* Inquiry failed (msg printed already) */
devtype = 0; /* so we will skip this device. */
} else /* what kind of device is this? */
devtype = (inq_buff[0] & 0x1f);
switch (devtype)
continue; /* so we will skip this device. */
this_device->devtype = (inq_buff[0] & 0x1f);
this_device->bus = -1;
this_device->target = -1;
this_device->lun = -1;
memcpy(this_device->scsi3addr, scsi3addr, 8);
memcpy(this_device->vendor, &inq_buff[8],
sizeof(this_device->vendor));
memcpy(this_device->model, &inq_buff[16],
sizeof(this_device->model));
memcpy(this_device->revision, &inq_buff[32],
sizeof(this_device->revision));
memset(this_device->device_id, 0,
sizeof(this_device->device_id));
cciss_scsi_get_device_id(hba[cntl_num], scsi3addr,
this_device->device_id, sizeof(this_device->device_id));
switch (this_device->devtype)
{
case 0x05: /* CD-ROM */ {
......@@ -1220,15 +1269,10 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
printk(KERN_INFO "cciss%d: %s ignored, "
"too many devices.\n", cntl_num,
scsi_device_type(devtype));
scsi_device_type(this_device->devtype));
break;
}
memcpy(&currentsd[ncurrent].scsi3addr[0],
&scsi3addr[0], 8);
currentsd[ncurrent].devtype = devtype;
currentsd[ncurrent].bus = -1;
currentsd[ncurrent].target = -1;
currentsd[ncurrent].lun = -1;
currentsd[ncurrent] = *this_device;
ncurrent++;
break;
default:
......@@ -1240,6 +1284,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
out:
kfree(inq_buff);
kfree(ld_buff);
kfree(currentsd);
return;
}
......
......@@ -66,6 +66,10 @@ struct cciss_scsi_dev_t {
int devtype;
int bus, target, lun; /* as presented to the OS */
unsigned char scsi3addr[8]; /* as presented to the HW */
unsigned char device_id[16]; /* from inquiry pg. 0x83 */
unsigned char vendor[8]; /* bytes 8-15 of inquiry data */
unsigned char model[16]; /* bytes 16-31 of inquiry data */
unsigned char revision[4]; /* bytes 32-35 of inquiry data */
};
struct cciss_scsi_hba_t {
......
......@@ -424,7 +424,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t),
&(hba[i]->cmd_pool_dhandle));
hba[i]->cmd_pool_bits = kcalloc(
(NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long),
DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long),
GFP_KERNEL);
if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool)
......
This diff is collapsed.
......@@ -403,7 +403,7 @@ static int nbd_do_it(struct nbd_device *lo)
BUG_ON(lo->magic != LO_MAGIC);
lo->pid = current->pid;
ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr);
ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
if (ret) {
printk(KERN_ERR "nbd: sysfs_create_file failed!");
return ret;
......@@ -412,7 +412,7 @@ static int nbd_do_it(struct nbd_device *lo)
while ((req = nbd_read_stat(lo)) != NULL)
nbd_end_request(req);
sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr);
sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
return 0;
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment