Commit 17829c5a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.6-2020-03-13' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A few fixes that should go into this release. This contains:

   - Fix for a corruption issue with the s390 dasd driver (Stefan)

   - Fixup/improvement for the flush insertion change that we had in
     this series (Ming)

   - Fix for the partition suppor for host aware zoned devices
     (Shin'ichiro)

   - Fix incorrect blk-iocost comparison (Tejun)

  The diffstat looks large, but that's a) mostly dasd, and b) the flush
  fix from Ming adds a big comment"

* tag 'block-5.6-2020-03-13' of git://git.kernel.dk/linux-block:
  block: Fix partition support for host aware zoned block devices
  blk-mq: insert flush request to the front of dispatch queue
  s390/dasd: fix data corruption for thin provisioned devices
  blk-iocost: fix incorrect vtime comparison in iocg_is_idle()
parents d3656129 b53df2e7
......@@ -1318,7 +1318,7 @@ static bool iocg_is_idle(struct ioc_gq *iocg)
return false;
/* is something in flight? */
if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime))
if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime))
return false;
return true;
......
......@@ -398,6 +398,28 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != -1));
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
/*
* Firstly normal IO request is inserted to scheduler queue or
* sw queue, meantime we add flush request to dispatch queue(
* hctx->dispatch) directly and there is at most one in-flight
* flush request for each hw queue, so it doesn't matter to add
* flush request to tail or front of the dispatch queue.
*
* Secondly in case of NCQ, flush request belongs to non-NCQ
* command, and queueing it will fail when there is any
* in-flight normal IO request(NCQ command). When adding flush
* rq to the front of hctx->dispatch, it is easier to introduce
* extra time to flush rq's latency because of S_SCHED_RESTART
* compared with adding to the tail of dispatch queue, then
* chance of flush merge is increased, and less flush requests
* will be issued to controller. It is observed that ~10% time
* is saved in blktests block/004 on disk attached to AHCI/NCQ
* drive when adding flush rq to the front of hctx->dispatch.
*
* Simply queue flush rq to the front of hctx->dispatch so that
* intensive flush workloads can benefit in case of NCQ HW.
*/
at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
blk_mq_request_bypass_insert(rq, at_head, false);
goto run;
}
......
......@@ -301,6 +301,42 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
}
EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
/**
* disk_has_partitions
* @disk: gendisk of interest
*
* Walk through the partition table and check if valid partition exists.
*
* CONTEXT:
* Don't care.
*
* RETURNS:
* True if the gendisk has at least one valid non-zero size partition.
* Otherwise false.
*/
bool disk_has_partitions(struct gendisk *disk)
{
struct disk_part_tbl *ptbl;
int i;
bool ret = false;
rcu_read_lock();
ptbl = rcu_dereference(disk->part_tbl);
/* Iterate partitions skipping the whole device at index 0 */
for (i = 1; i < ptbl->len; i++) {
if (rcu_dereference(ptbl->part[i])) {
ret = true;
break;
}
}
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(disk_has_partitions);
/*
* Can be deleted altogether. Later.
*
......
......@@ -178,6 +178,8 @@ struct dasd_block *dasd_alloc_block(void)
(unsigned long) block);
INIT_LIST_HEAD(&block->ccw_queue);
spin_lock_init(&block->queue_lock);
INIT_LIST_HEAD(&block->format_list);
spin_lock_init(&block->format_lock);
timer_setup(&block->timer, dasd_block_timeout, 0);
spin_lock_init(&block->profile.lock);
......@@ -1779,20 +1781,26 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
if (dasd_ese_needs_format(cqr->block, irb)) {
if (rq_data_dir((struct request *)cqr->callback_data) == READ) {
device->discipline->ese_read(cqr);
device->discipline->ese_read(cqr, irb);
cqr->status = DASD_CQR_SUCCESS;
cqr->stopclk = now;
dasd_device_clear_timer(device);
dasd_schedule_device_bh(device);
return;
}
fcqr = device->discipline->ese_format(device, cqr);
fcqr = device->discipline->ese_format(device, cqr, irb);
if (IS_ERR(fcqr)) {
if (PTR_ERR(fcqr) == -EINVAL) {
cqr->status = DASD_CQR_ERROR;
return;
}
/*
* If we can't format now, let the request go
* one extra round. Maybe we can format later.
*/
cqr->status = DASD_CQR_QUEUED;
dasd_schedule_device_bh(device);
return;
} else {
fcqr->status = DASD_CQR_QUEUED;
cqr->status = DASD_CQR_QUEUED;
......@@ -2748,11 +2756,13 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
{
struct request *req;
blk_status_t error = BLK_STS_OK;
unsigned int proc_bytes;
int status;
req = (struct request *) cqr->callback_data;
dasd_profile_end(cqr->block, cqr, req);
proc_bytes = cqr->proc_bytes;
status = cqr->block->base->discipline->free_cp(cqr, req);
if (status < 0)
error = errno_to_blk_status(status);
......@@ -2782,9 +2792,20 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
if (error) {
blk_mq_end_request(req, error);
blk_mq_run_hw_queues(req->q, true);
} else {
/*
* Partial completed requests can happen with ESE devices.
* During read we might have gotten a NRF error and have to
* complete a request partially.
*/
if (proc_bytes) {
blk_update_request(req, BLK_STS_OK,
blk_rq_bytes(req) - proc_bytes);
blk_mq_requeue_request(req, true);
} else {
blk_mq_complete_request(req);
}
}
}
/*
......
......@@ -207,6 +207,45 @@ static void set_ch_t(struct ch_t *geo, __u32 cyl, __u8 head)
geo->head |= head;
}
/*
* calculate failing track from sense data depending if
* it is an EAV device or not
*/
static int dasd_eckd_track_from_irb(struct irb *irb, struct dasd_device *device,
sector_t *track)
{
struct dasd_eckd_private *private = device->private;
u8 *sense = NULL;
u32 cyl;
u8 head;
sense = dasd_get_sense(irb);
if (!sense) {
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
"ESE error no sense data\n");
return -EINVAL;
}
if (!(sense[27] & DASD_SENSE_BIT_2)) {
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
"ESE error no valid track data\n");
return -EINVAL;
}
if (sense[27] & DASD_SENSE_BIT_3) {
/* enhanced addressing */
cyl = sense[30] << 20;
cyl |= (sense[31] & 0xF0) << 12;
cyl |= sense[28] << 8;
cyl |= sense[29];
} else {
cyl = sense[29] << 8;
cyl |= sense[30];
}
head = sense[31] & 0x0F;
*track = cyl * private->rdc_data.trk_per_cyl + head;
return 0;
}
static int set_timestamp(struct ccw1 *ccw, struct DE_eckd_data *data,
struct dasd_device *device)
{
......@@ -2986,6 +3025,37 @@ static int dasd_eckd_format_device(struct dasd_device *base,
0, NULL);
}
static bool test_and_set_format_track(struct dasd_format_entry *to_format,
struct dasd_block *block)
{
struct dasd_format_entry *format;
unsigned long flags;
bool rc = false;
spin_lock_irqsave(&block->format_lock, flags);
list_for_each_entry(format, &block->format_list, list) {
if (format->track == to_format->track) {
rc = true;
goto out;
}
}
list_add_tail(&to_format->list, &block->format_list);
out:
spin_unlock_irqrestore(&block->format_lock, flags);
return rc;
}
static void clear_format_track(struct dasd_format_entry *format,
struct dasd_block *block)
{
unsigned long flags;
spin_lock_irqsave(&block->format_lock, flags);
list_del_init(&format->list);
spin_unlock_irqrestore(&block->format_lock, flags);
}
/*
* Callback function to free ESE format requests.
*/
......@@ -2993,15 +3063,19 @@ static void dasd_eckd_ese_format_cb(struct dasd_ccw_req *cqr, void *data)
{
struct dasd_device *device = cqr->startdev;
struct dasd_eckd_private *private = device->private;
struct dasd_format_entry *format = data;
clear_format_track(format, cqr->basedev->block);
private->count--;
dasd_ffree_request(cqr, device);
}
static struct dasd_ccw_req *
dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
struct irb *irb)
{
struct dasd_eckd_private *private;
struct dasd_format_entry *format;
struct format_data_t fdata;
unsigned int recs_per_trk;
struct dasd_ccw_req *fcqr;
......@@ -3011,23 +3085,39 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
struct request *req;
sector_t first_trk;
sector_t last_trk;
sector_t curr_trk;
int rc;
req = cqr->callback_data;
base = cqr->block->base;
block = cqr->block;
base = block->base;
private = base->private;
block = base->block;
blksize = block->bp_block;
recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
format = &startdev->format_entry;
first_trk = blk_rq_pos(req) >> block->s2b_shift;
sector_div(first_trk, recs_per_trk);
last_trk =
(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
sector_div(last_trk, recs_per_trk);
rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
if (rc)
return ERR_PTR(rc);
fdata.start_unit = first_trk;
fdata.stop_unit = last_trk;
if (curr_trk < first_trk || curr_trk > last_trk) {
DBF_DEV_EVENT(DBF_WARNING, startdev,
"ESE error track %llu not within range %llu - %llu\n",
curr_trk, first_trk, last_trk);
return ERR_PTR(-EINVAL);
}
format->track = curr_trk;
/* test if track is already in formatting by another thread */
if (test_and_set_format_track(format, block))
return ERR_PTR(-EEXIST);
fdata.start_unit = curr_trk;
fdata.stop_unit = curr_trk;
fdata.blksize = blksize;
fdata.intensity = private->uses_cdl ? DASD_FMT_INT_COMPAT : 0;
......@@ -3044,6 +3134,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
return fcqr;
fcqr->callback = dasd_eckd_ese_format_cb;
fcqr->callback_data = (void *) format;
return fcqr;
}
......@@ -3051,29 +3142,87 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr)
/*
* When data is read from an unformatted area of an ESE volume, this function
* returns zeroed data and thereby mimics a read of zero data.
*
* The first unformatted track is the one that got the NRF error, the address is
* encoded in the sense data.
*
* All tracks before have returned valid data and should not be touched.
* All tracks after the unformatted track might be formatted or not. This is
* currently not known, remember the processed data and return the remainder of
* the request to the blocklayer in __dasd_cleanup_cqr().
*/
static void dasd_eckd_ese_read(struct dasd_ccw_req *cqr)
static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
{
struct dasd_eckd_private *private;
sector_t first_trk, last_trk;
sector_t first_blk, last_blk;
unsigned int blksize, off;
unsigned int recs_per_trk;
struct dasd_device *base;
struct req_iterator iter;
struct dasd_block *block;
unsigned int skip_block;
unsigned int blk_count;
struct request *req;
struct bio_vec bv;
sector_t curr_trk;
sector_t end_blk;
char *dst;
int rc;
req = (struct request *) cqr->callback_data;
base = cqr->block->base;
blksize = base->block->bp_block;
block = cqr->block;
private = base->private;
skip_block = 0;
blk_count = 0;
recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
first_trk = first_blk = blk_rq_pos(req) >> block->s2b_shift;
sector_div(first_trk, recs_per_trk);
last_trk = last_blk =
(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
sector_div(last_trk, recs_per_trk);
rc = dasd_eckd_track_from_irb(irb, base, &curr_trk);
if (rc)
return rc;
/* sanity check if the current track from sense data is valid */
if (curr_trk < first_trk || curr_trk > last_trk) {
DBF_DEV_EVENT(DBF_WARNING, base,
"ESE error track %llu not within range %llu - %llu\n",
curr_trk, first_trk, last_trk);
return -EINVAL;
}
/*
* if not the first track got the NRF error we have to skip over valid
* blocks
*/
if (curr_trk != first_trk)
skip_block = curr_trk * recs_per_trk - first_blk;
/* we have no information beyond the current track */
end_blk = (curr_trk + 1) * recs_per_trk;
rq_for_each_segment(bv, req, iter) {
dst = page_address(bv.bv_page) + bv.bv_offset;
for (off = 0; off < bv.bv_len; off += blksize) {
if (dst && rq_data_dir(req) == READ) {
if (first_blk + blk_count >= end_blk) {
cqr->proc_bytes = blk_count * blksize;
return 0;
}
if (dst && !skip_block) {
dst += off;
memset(dst, 0, blksize);
} else {
skip_block--;
}
blk_count++;
}
}
return 0;
}
/*
......
......@@ -187,6 +187,7 @@ struct dasd_ccw_req {
void (*callback)(struct dasd_ccw_req *, void *data);
void *callback_data;
unsigned int proc_bytes; /* bytes for partial completion */
};
/*
......@@ -387,8 +388,9 @@ struct dasd_discipline {
int (*ext_pool_warn_thrshld)(struct dasd_device *);
int (*ext_pool_oos)(struct dasd_device *);
int (*ext_pool_exhaust)(struct dasd_device *, struct dasd_ccw_req *);
struct dasd_ccw_req *(*ese_format)(struct dasd_device *, struct dasd_ccw_req *);
void (*ese_read)(struct dasd_ccw_req *);
struct dasd_ccw_req *(*ese_format)(struct dasd_device *,
struct dasd_ccw_req *, struct irb *);
int (*ese_read)(struct dasd_ccw_req *, struct irb *);
};
extern struct dasd_discipline *dasd_diag_discipline_pointer;
......@@ -474,6 +476,11 @@ struct dasd_profile {
spinlock_t lock;
};
struct dasd_format_entry {
struct list_head list;
sector_t track;
};
struct dasd_device {
/* Block device stuff. */
struct dasd_block *block;
......@@ -539,6 +546,7 @@ struct dasd_device {
struct dentry *debugfs_dentry;
struct dentry *hosts_dentry;
struct dasd_profile profile;
struct dasd_format_entry format_entry;
};
struct dasd_block {
......@@ -564,6 +572,9 @@ struct dasd_block {
struct dentry *debugfs_dentry;
struct dasd_profile profile;
struct list_head format_list;
spinlock_t format_lock;
};
struct dasd_attention_data {
......
......@@ -245,18 +245,6 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk)
!(disk->flags & GENHD_FL_NO_PART_SCAN);
}
static inline bool disk_has_partitions(struct gendisk *disk)
{
bool ret = false;
rcu_read_lock();
if (rcu_dereference(disk->part_tbl)->len > 1)
ret = true;
rcu_read_unlock();
return ret;
}
static inline dev_t disk_devt(struct gendisk *disk)
{
return MKDEV(disk->major, disk->first_minor);
......@@ -298,6 +286,7 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter);
extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
sector_t sector);
bool disk_has_partitions(struct gendisk *disk);
/*
* Macros to operate on percpu disk statistics:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment