Commit 85b2148a authored by Jens Axboe's avatar Jens Axboe

[PATCH] deadline scheduler

This introduces the deadline-ioscheduler, making it the default.  2nd
patch coming that deletes elevator_linus in a minute.

This one has read_expire at 500ms, and writes_starved at 2.
parent 650e56ee
...@@ -9,9 +9,9 @@ ...@@ -9,9 +9,9 @@
# #
export-objs := elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \ export-objs := elevator.o ll_rw_blk.o loop.o genhd.o acsi.o \
block_ioctl.o block_ioctl.o deadline-iosched.o
obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o block_ioctl.o deadline-iosched.o
obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o
......
/*
* linux/drivers/block/deadline-iosched.c
*
* Deadline i/o scheduler.
*
* Copyright (C) 2002 Jens Axboe <axboe@suse.de>
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/blk.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/hash.h>
/*
* feel free to try other values :-). read_expire value is the timeout for
* reads, our goal is to start a request "around" the time when it expires.
* fifo_batch is how many steps along the sorted list we will take when the
* front fifo request expires.
*/
static int read_expire = HZ / 2; /* 500ms start timeout */
static int fifo_batch = 64; /* 4 seeks, or 64 contig */
static int seek_cost = 16; /* seek is 16 times more expensive */
/*
* how many times reads are allowed to starve writes
*/
static int writes_starved = 2;
static const int deadline_hash_shift = 8;
#define DL_HASH_BLOCK(sec) ((sec) >> 3)
#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
#define DL_HASH_ENTRIES (1 << deadline_hash_shift)
#define DL_INVALIDATE_HASH(dd) \
do { \
if (!++(dd)->hash_valid_count) \
(dd)->hash_valid_count = 1; \
} while (0)
struct deadline_data {
/*
* run time data
*/
struct list_head sort_list[2]; /* sorted listed */
struct list_head read_fifo; /* fifo list */
struct list_head *dispatch; /* driver dispatch queue */
struct list_head *hash; /* request hash */
sector_t last_sector; /* last sector sent to drive */
unsigned long hash_valid_count; /* barrier hash count */
unsigned int starved; /* writes starved */
/*
* settings that change how the i/o scheduler behaves
*/
unsigned int fifo_batch;
unsigned long read_expire;
unsigned int seek_cost;
unsigned int writes_starved;
};
/*
* pre-request data.
*/
struct deadline_rq {
struct list_head fifo;
struct list_head hash;
unsigned long hash_valid_count;
struct request *request;
unsigned long expires;
};
static kmem_cache_t *drq_pool;
#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
/*
* rq hash
*/
static inline void __deadline_del_rq_hash(struct deadline_rq *drq)
{
drq->hash_valid_count = 0;
list_del_init(&drq->hash);
}
#define ON_HASH(drq) (drq)->hash_valid_count
static inline void deadline_del_rq_hash(struct deadline_rq *drq)
{
if (ON_HASH(drq))
__deadline_del_rq_hash(drq);
}
static inline void
deadline_add_rq_hash(struct deadline_data *dd, struct deadline_rq *drq)
{
struct request *rq = drq->request;
BUG_ON(ON_HASH(drq));
drq->hash_valid_count = dd->hash_valid_count;
list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq->sector +rq->nr_sectors)]);
}
#define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash)
static struct request *
deadline_find_hash(struct deadline_data *dd, sector_t offset)
{
struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
struct list_head *entry, *next = hash_list->next;
struct deadline_rq *drq;
struct request *rq = NULL;
while ((entry = next) != hash_list) {
next = entry->next;
drq = list_entry_hash(entry);
BUG_ON(!drq->hash_valid_count);
if (!rq_mergeable(drq->request)
|| drq->hash_valid_count != dd->hash_valid_count) {
__deadline_del_rq_hash(drq);
continue;
}
if (drq->request->sector + drq->request->nr_sectors == offset) {
rq = drq->request;
break;
}
}
return rq;
}
static int
deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
{
struct deadline_data *dd = q->elevator.elevator_data;
const int data_dir = bio_data_dir(bio);
struct list_head *entry, *sort_list;
struct deadline_rq *drq;
struct request *__rq;
int ret = ELEVATOR_NO_MERGE;
/*
* try last_merge to avoid going to hash
*/
ret = elv_try_last_merge(q, req, bio);
if (ret != ELEVATOR_NO_MERGE)
goto out;
/*
* see if the merge hash can satisfy a back merge
*/
if ((__rq = deadline_find_hash(dd, bio->bi_sector))) {
BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
if (elv_rq_merge_ok(__rq, bio)) {
*req = __rq;
q->last_merge = &__rq->queuelist;
ret = ELEVATOR_BACK_MERGE;
goto out_ret;
}
}
entry = sort_list = &dd->sort_list[data_dir];
while ((entry = entry->prev) != sort_list) {
__rq = list_entry_rq(entry);
drq = RQ_DATA(__rq);
BUG_ON(__rq->flags & REQ_STARTED);
if (!(__rq->flags & REQ_CMD))
continue;
if (!*req && bio_rq_in_between(bio, __rq, sort_list))
*req = __rq;
if (__rq->flags & REQ_BARRIER)
break;
/*
* checking for a front merge, hash will miss those
*/
if (__rq->sector - bio_sectors(bio) == bio->bi_sector) {
ret = elv_try_merge(__rq, bio);
if (ret != ELEVATOR_NO_MERGE) {
*req = __rq;
q->last_merge = &__rq->queuelist;
break;
}
}
}
out:
if (ret != ELEVATOR_NO_MERGE) {
struct deadline_rq *drq = RQ_DATA(*req);
deadline_del_rq_hash(drq);
deadline_add_rq_hash(dd, drq);
}
out_ret:
return ret;
}
static void
deadline_merge_request(request_queue_t *q, struct request *req, struct request *next)
{
struct deadline_data *dd = q->elevator.elevator_data;
struct deadline_rq *drq = RQ_DATA(req);
struct deadline_rq *dnext = RQ_DATA(next);
BUG_ON(!drq);
BUG_ON(!dnext);
deadline_del_rq_hash(drq);
deadline_add_rq_hash(dd, drq);
/*
* if dnext expires before drq, assign it's expire time to drq
* and move into dnext position (dnext will be deleted) in fifo
*/
if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
if (time_before(dnext->expires, drq->expires)) {
list_move(&drq->fifo, &dnext->fifo);
drq->expires = dnext->expires;
}
}
}
/*
* move request from sort list to dispatch queue. maybe remove from rq hash
* here too?
*/
static inline void
deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
{
struct deadline_rq *drq = RQ_DATA(rq);
list_move_tail(&rq->queuelist, dd->dispatch);
list_del_init(&drq->fifo);
}
/*
* move along sort list and move entries to dispatch queue, starting from rq
*/
static void deadline_move_requests(struct deadline_data *dd, struct request *rq)
{
struct list_head *sort_head = &dd->sort_list[rq_data_dir(rq)];
sector_t last_sec = dd->last_sector;
int batch_count = dd->fifo_batch;
do {
struct list_head *nxt = rq->queuelist.next;
/*
* take it off the sort and fifo list, move
* to dispatch queue
*/
deadline_move_to_dispatch(dd, rq);
if (rq->sector == last_sec)
batch_count--;
else
batch_count -= dd->seek_cost;
if (nxt == sort_head)
break;
last_sec = rq->sector + rq->nr_sectors;
rq = list_entry_rq(nxt);
} while (batch_count > 0);
}
/*
* returns 0 if there are no expired reads on the fifo, 1 otherwise
*/
#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
static inline int deadline_check_fifo(struct deadline_data *dd)
{
struct deadline_rq *drq;
if (list_empty(&dd->read_fifo))
return 0;
drq = list_entry_fifo(dd->read_fifo.next);
if (time_before(jiffies, drq->expires))
return 0;
return 1;
}
static struct request *deadline_next_request(request_queue_t *q)
{
struct deadline_data *dd = q->elevator.elevator_data;
struct deadline_rq *drq;
struct list_head *nxt;
struct request *rq;
int writes;
/*
* if still requests on the dispatch queue, just grab the first one
*/
if (!list_empty(&q->queue_head)) {
dispatch:
rq = list_entry_rq(q->queue_head.next);
dd->last_sector = rq->sector + rq->nr_sectors;
return rq;
}
writes = !list_empty(&dd->sort_list[WRITE]);
/*
* if we have expired entries on the fifo list, move some to dispatch
*/
if (deadline_check_fifo(dd)) {
if (writes && (dd->starved++ >= dd->writes_starved))
goto dispatch_writes;
nxt = dd->read_fifo.next;
drq = list_entry_fifo(nxt);
deadline_move_requests(dd, drq->request);
goto dispatch;
}
if (!list_empty(&dd->sort_list[READ])) {
if (writes && (dd->starved++ >= dd->writes_starved))
goto dispatch_writes;
nxt = dd->sort_list[READ].next;
deadline_move_requests(dd, list_entry_rq(nxt));
goto dispatch;
}
/*
* either there are no reads expired or on sort list, or the reads
* have starved writes for too long. dispatch some writes
*/
if (writes) {
dispatch_writes:
nxt = dd->sort_list[WRITE].next;
deadline_move_requests(dd, list_entry_rq(nxt));
dd->starved = 0;
goto dispatch;
}
BUG_ON(!list_empty(&dd->sort_list[READ]));
BUG_ON(writes);
return NULL;
}
static void
deadline_add_request(request_queue_t *q, struct request *rq, struct list_head *insert_here)
{
struct deadline_data *dd = q->elevator.elevator_data;
struct deadline_rq *drq = RQ_DATA(rq);
const int data_dir = rq_data_dir(rq);
/*
* flush hash on barrier insert, as not to allow merges before a
* barrier.
*/
if (unlikely(rq->flags & REQ_BARRIER)) {
DL_INVALIDATE_HASH(dd);
q->last_merge = NULL;
}
/*
* add to sort list
*/
if (!insert_here)
insert_here = dd->sort_list[data_dir].prev;
list_add(&rq->queuelist, insert_here);
if (unlikely(!(rq->flags & REQ_CMD)))
return;
if (rq_mergeable(rq)) {
deadline_add_rq_hash(dd, drq);
if (!q->last_merge)
q->last_merge = &rq->queuelist;
}
if (data_dir == READ) {
/*
* set expire time and add to fifo list
*/
drq->expires = jiffies + dd->read_expire;
list_add_tail(&drq->fifo, &dd->read_fifo);
}
}
static void deadline_remove_request(request_queue_t *q, struct request *rq)
{
struct deadline_rq *drq = RQ_DATA(rq);
if (drq) {
list_del_init(&drq->fifo);
deadline_del_rq_hash(drq);
}
}
static int deadline_queue_empty(request_queue_t *q)
{
struct deadline_data *dd = q->elevator.elevator_data;
if (!list_empty(&q->queue_head) || !list_empty(&dd->sort_list[READ])
|| !list_empty(&dd->sort_list[WRITE]))
return 0;
BUG_ON(!list_empty(&dd->read_fifo));
return 1;
}
static struct list_head *
deadline_get_sort_head(request_queue_t *q, struct request *rq)
{
struct deadline_data *dd = q->elevator.elevator_data;
return &dd->sort_list[rq_data_dir(rq)];
}
static void deadline_exit(request_queue_t *q, elevator_t *e)
{
struct deadline_data *dd = e->elevator_data;
struct deadline_rq *drq;
struct request *rq;
int i;
BUG_ON(!list_empty(&dd->read_fifo));
BUG_ON(!list_empty(&dd->sort_list[READ]));
BUG_ON(!list_empty(&dd->sort_list[WRITE]));
for (i = READ; i <= WRITE; i++) {
struct request_list *rl = &q->rq[i];
struct list_head *entry = &rl->free;
if (list_empty(&rl->free))
continue;
while ((entry = entry->next) != &rl->free) {
rq = list_entry_rq(entry);
if ((drq = RQ_DATA(rq)) == NULL)
continue;
rq->elevator_private = NULL;
kmem_cache_free(drq_pool, drq);
}
}
kfree(dd->hash);
kfree(dd);
}
/*
* initialize elevator private data (deadline_data), and alloc a drq for
* each request on the free lists
*/
static int deadline_init(request_queue_t *q, elevator_t *e)
{
struct deadline_data *dd;
struct deadline_rq *drq;
struct request *rq;
int i, ret = 0;
if (!drq_pool)
return -ENOMEM;
dd = kmalloc(sizeof(*dd), GFP_KERNEL);
if (!dd)
return -ENOMEM;
memset(dd, 0, sizeof(*dd));
dd->hash = kmalloc(sizeof(struct list_head)*DL_HASH_ENTRIES,GFP_KERNEL);
if (!dd->hash) {
kfree(dd);
return -ENOMEM;
}
for (i = 0; i < DL_HASH_ENTRIES; i++)
INIT_LIST_HEAD(&dd->hash[i]);
INIT_LIST_HEAD(&dd->read_fifo);
INIT_LIST_HEAD(&dd->sort_list[READ]);
INIT_LIST_HEAD(&dd->sort_list[WRITE]);
dd->dispatch = &q->queue_head;
dd->fifo_batch = fifo_batch;
dd->read_expire = read_expire;
dd->seek_cost = seek_cost;
dd->hash_valid_count = 1;
dd->writes_starved = writes_starved;
e->elevator_data = dd;
for (i = READ; i <= WRITE; i++) {
struct request_list *rl = &q->rq[i];
struct list_head *entry = &rl->free;
if (list_empty(&rl->free))
continue;
while ((entry = entry->next) != &rl->free) {
rq = list_entry_rq(entry);
drq = kmem_cache_alloc(drq_pool, GFP_KERNEL);
if (!drq) {
ret = -ENOMEM;
break;
}
memset(drq, 0, sizeof(*drq));
INIT_LIST_HEAD(&drq->fifo);
INIT_LIST_HEAD(&drq->hash);
drq->request = rq;
rq->elevator_private = drq;
}
}
if (ret)
deadline_exit(q, e);
return ret;
}
static int __init deadline_slab_setup(void)
{
drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!drq_pool)
panic("deadline: can't init slab pool\n");
return 0;
}
module_init(deadline_slab_setup);
elevator_t iosched_deadline = {
.elevator_merge_fn = deadline_merge,
.elevator_merge_req_fn = deadline_merge_request,
.elevator_next_req_fn = deadline_next_request,
.elevator_add_req_fn = deadline_add_request,
.elevator_remove_req_fn = deadline_remove_request,
.elevator_queue_empty_fn = deadline_queue_empty,
.elevator_get_sort_head_fn = deadline_get_sort_head,
.elevator_init_fn = deadline_init,
.elevator_exit_fn = deadline_exit,
};
EXPORT_SYMBOL(iosched_deadline);
...@@ -1175,7 +1175,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock) ...@@ -1175,7 +1175,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
if (blk_init_free_list(q)) if (blk_init_free_list(q))
return -ENOMEM; return -ENOMEM;
if ((ret = elevator_init(q, &q->elevator, elevator_linus))) { if ((ret = elevator_init(q, &q->elevator, iosched_deadline))) {
blk_cleanup_queue(q); blk_cleanup_queue(q);
return ret; return ret;
} }
......
...@@ -59,6 +59,12 @@ extern elevator_t elevator_linus; ...@@ -59,6 +59,12 @@ extern elevator_t elevator_linus;
#define elv_linus_sequence(rq) ((long)(rq)->elevator_private) #define elv_linus_sequence(rq) ((long)(rq)->elevator_private)
#define ELV_LINUS_SEEK_COST 16 #define ELV_LINUS_SEEK_COST 16
/*
* deadline i/o scheduler. uses request time outs to prevent indefinite
* starvation
*/
extern elevator_t iosched_deadline;
/* /*
* use the /proc/iosched interface, all the below is history -> * use the /proc/iosched interface, all the below is history ->
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment