/* * linux/drivers/block/deadline-iosched.c * * Deadline i/o scheduler. * * Copyright (C) 2002 Jens Axboe <axboe@suse.de> */ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/elevator.h> #include <linux/bio.h> #include <linux/blk.h> #include <linux/config.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/compiler.h> #include <linux/hash.h> /* * feel free to try other values :-). read_expire value is the timeout for * reads, our goal is to start a request "around" the time when it expires. * fifo_batch is how many steps along the sorted list we will take when the * front fifo request expires. */ static int read_expire = HZ / 2; /* 500ms start timeout */ static int fifo_batch = 32; /* 4 seeks, or 64 contig */ static int seek_cost = 16; /* seek is 16 times more expensive */ /* * how many times reads are allowed to starve writes */ static int writes_starved = 2; static const int deadline_hash_shift = 8; #define DL_HASH_BLOCK(sec) ((sec) >> 3) #define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift)) #define DL_HASH_ENTRIES (1 << deadline_hash_shift) #define DL_INVALIDATE_HASH(dd) \ do { \ if (!++(dd)->hash_valid_count) \ (dd)->hash_valid_count = 1; \ } while (0) struct deadline_data { /* * run time data */ struct list_head sort_list[2]; /* sorted listed */ struct list_head read_fifo; /* fifo list */ struct list_head *dispatch; /* driver dispatch queue */ struct list_head *hash; /* request hash */ sector_t last_sector; /* last sector sent to drive */ unsigned long hash_valid_count; /* barrier hash count */ unsigned int starved; /* writes starved */ /* * settings that change how the i/o scheduler behaves */ unsigned int fifo_batch; unsigned long read_expire; unsigned int seek_cost; unsigned int writes_starved; }; /* * pre-request data. */ struct deadline_rq { struct list_head fifo; struct list_head hash; unsigned long hash_valid_count; struct request *request; unsigned long expires; }; static kmem_cache_t *drq_pool; #define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private) /* * rq hash */ static inline void __deadline_del_rq_hash(struct deadline_rq *drq) { drq->hash_valid_count = 0; list_del_init(&drq->hash); } #define ON_HASH(drq) (drq)->hash_valid_count static inline void deadline_del_rq_hash(struct deadline_rq *drq) { if (ON_HASH(drq)) __deadline_del_rq_hash(drq); } static inline void deadline_add_rq_hash(struct deadline_data *dd, struct deadline_rq *drq) { struct request *rq = drq->request; BUG_ON(ON_HASH(drq)); drq->hash_valid_count = dd->hash_valid_count; list_add(&drq->hash, &dd->hash[DL_HASH_FN(rq->sector +rq->nr_sectors)]); } #define list_entry_hash(ptr) list_entry((ptr), struct deadline_rq, hash) static struct request * deadline_find_hash(struct deadline_data *dd, sector_t offset) { struct list_head *hash_list = &dd->hash[DL_HASH_FN(offset)]; struct list_head *entry, *next = hash_list->next; struct deadline_rq *drq; struct request *rq = NULL; while ((entry = next) != hash_list) { next = entry->next; drq = list_entry_hash(entry); BUG_ON(!drq->hash_valid_count); if (!rq_mergeable(drq->request) || drq->hash_valid_count != dd->hash_valid_count) { __deadline_del_rq_hash(drq); continue; } if (drq->request->sector + drq->request->nr_sectors == offset) { rq = drq->request; break; } } return rq; } static sector_t deadline_get_last_sector(struct deadline_data *dd) { sector_t last_sec = dd->last_sector; /* * if dispatch is non-empty, disregard last_sector and check last one */ if (!list_empty(dd->dispatch)) { struct request *__rq = list_entry_rq(dd->dispatch->prev); last_sec = __rq->sector + __rq->nr_sectors; } return last_sec; } static int deadline_merge(request_queue_t *q, struct list_head **insert, struct bio *bio) { struct deadline_data *dd = q->elevator.elevator_data; const int data_dir = bio_data_dir(bio); struct list_head *entry, *sort_list; struct request *__rq; int ret = ELEVATOR_NO_MERGE; /* * try last_merge to avoid going to hash */ ret = elv_try_last_merge(q, bio); if (ret != ELEVATOR_NO_MERGE) { *insert = q->last_merge; goto out; } /* * see if the merge hash can satisfy a back merge */ if ((__rq = deadline_find_hash(dd, bio->bi_sector))) { BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); if (elv_rq_merge_ok(__rq, bio)) { *insert = &__rq->queuelist; ret = ELEVATOR_BACK_MERGE; goto out; } } /* * scan list from back to find insertion point. */ entry = sort_list = &dd->sort_list[data_dir]; while ((entry = entry->prev) != sort_list) { __rq = list_entry_rq(entry); BUG_ON(__rq->flags & REQ_STARTED); if (!(__rq->flags & REQ_CMD)) continue; /* * it's not necessary to break here, and in fact it could make * us loose a front merge. emperical evidence shows this to * be a big waste of cycles though, so quit scanning */ if (!*insert && bio_rq_in_between(bio, __rq, sort_list)) { *insert = &__rq->queuelist; break; } if (__rq->flags & REQ_BARRIER) break; /* * checking for a front merge, hash will miss those */ if (__rq->sector - bio_sectors(bio) == bio->bi_sector) { ret = elv_try_merge(__rq, bio); if (ret != ELEVATOR_NO_MERGE) { *insert = &__rq->queuelist; break; } } } /* * no insertion point found, check the very front */ if (!*insert && !list_empty(sort_list)) { __rq = list_entry_rq(sort_list->next); if (bio->bi_sector + bio_sectors(bio) < __rq->sector && bio->bi_sector > deadline_get_last_sector(dd)) *insert = sort_list; } out: return ret; } static void deadline_merged_request(request_queue_t *q, struct request *req) { struct deadline_data *dd = q->elevator.elevator_data; struct deadline_rq *drq = RQ_DATA(req); deadline_del_rq_hash(drq); deadline_add_rq_hash(dd, drq); q->last_merge = &req->queuelist; } static void deadline_merge_request(request_queue_t *q, struct request *req, struct request *next) { struct deadline_data *dd = q->elevator.elevator_data; struct deadline_rq *drq = RQ_DATA(req); struct deadline_rq *dnext = RQ_DATA(next); BUG_ON(!drq); BUG_ON(!dnext); deadline_del_rq_hash(drq); deadline_add_rq_hash(dd, drq); /* * if dnext expires before drq, assign it's expire time to drq * and move into dnext position (dnext will be deleted) in fifo */ if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) { if (time_before(dnext->expires, drq->expires)) { list_move(&drq->fifo, &dnext->fifo); drq->expires = dnext->expires; } } } /* * move request from sort list to dispatch queue. maybe remove from rq hash * here too? */ static inline void deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq) { struct deadline_rq *drq = RQ_DATA(rq); list_move_tail(&rq->queuelist, dd->dispatch); list_del_init(&drq->fifo); } /* * move along sort list and move entries to dispatch queue, starting from rq */ static void deadline_move_requests(struct deadline_data *dd, struct request *rq) { struct list_head *sort_head = &dd->sort_list[rq_data_dir(rq)]; sector_t last_sec = deadline_get_last_sector(dd); int batch_count = dd->fifo_batch; do { struct list_head *nxt = rq->queuelist.next; int this_rq_cost; /* * take it off the sort and fifo list, move * to dispatch queue */ deadline_move_to_dispatch(dd, rq); /* * if this is the last entry, don't bother doing accounting */ if (nxt == sort_head) break; this_rq_cost = dd->seek_cost; if (rq->sector == last_sec) this_rq_cost = (rq->nr_sectors + 255) >> 8; batch_count -= this_rq_cost; if (batch_count <= 0) break; last_sec = rq->sector + rq->nr_sectors; rq = list_entry_rq(nxt); } while (1); } /* * returns 0 if there are no expired reads on the fifo, 1 otherwise */ #define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo) static inline int deadline_check_fifo(struct deadline_data *dd) { if (!list_empty(&dd->read_fifo)) { struct deadline_rq *drq = list_entry_fifo(dd->read_fifo.next); /* * drq is expired! */ if (time_after(jiffies, drq->expires)) return 1; } return 0; } static struct request *deadline_next_request(request_queue_t *q) { struct deadline_data *dd = q->elevator.elevator_data; struct deadline_rq *drq; struct list_head *nxt; struct request *rq; int writes; /* * if still requests on the dispatch queue, just grab the first one */ if (!list_empty(&q->queue_head)) { dispatch: rq = list_entry_rq(q->queue_head.next); dd->last_sector = rq->sector + rq->nr_sectors; return rq; } writes = !list_empty(&dd->sort_list[WRITE]); /* * if we have expired entries on the fifo list, move some to dispatch */ if (deadline_check_fifo(dd)) { if (writes && (dd->starved++ >= dd->writes_starved)) goto dispatch_writes; nxt = dd->read_fifo.next; drq = list_entry_fifo(nxt); deadline_move_requests(dd, drq->request); goto dispatch; } if (!list_empty(&dd->sort_list[READ])) { if (writes && (dd->starved++ >= dd->writes_starved)) goto dispatch_writes; nxt = dd->sort_list[READ].next; deadline_move_requests(dd, list_entry_rq(nxt)); goto dispatch; } /* * either there are no reads expired or on sort list, or the reads * have starved writes for too long. dispatch some writes */ if (writes) { dispatch_writes: nxt = dd->sort_list[WRITE].next; deadline_move_requests(dd, list_entry_rq(nxt)); dd->starved = 0; goto dispatch; } BUG_ON(!list_empty(&dd->sort_list[READ])); BUG_ON(writes); return NULL; } static void deadline_add_request(request_queue_t *q, struct request *rq, struct list_head *insert_here) { struct deadline_data *dd = q->elevator.elevator_data; struct deadline_rq *drq = RQ_DATA(rq); const int data_dir = rq_data_dir(rq); /* * flush hash on barrier insert, as not to allow merges before a * barrier. */ if (unlikely(rq->flags & REQ_BARRIER)) { DL_INVALIDATE_HASH(dd); q->last_merge = NULL; } /* * add to sort list */ if (!insert_here) insert_here = dd->sort_list[data_dir].prev; list_add(&rq->queuelist, insert_here); if (unlikely(!(rq->flags & REQ_CMD))) return; if (rq_mergeable(rq)) { deadline_add_rq_hash(dd, drq); if (!q->last_merge) q->last_merge = &rq->queuelist; } if (data_dir == READ) { /* * set expire time and add to fifo list */ drq->expires = jiffies + dd->read_expire; list_add_tail(&drq->fifo, &dd->read_fifo); } } static void deadline_remove_request(request_queue_t *q, struct request *rq) { struct deadline_rq *drq = RQ_DATA(rq); if (drq) { list_del_init(&drq->fifo); deadline_del_rq_hash(drq); } } static int deadline_queue_empty(request_queue_t *q) { struct deadline_data *dd = q->elevator.elevator_data; if (!list_empty(&dd->sort_list[WRITE]) || !list_empty(&dd->sort_list[READ]) || !list_empty(&q->queue_head)) return 0; BUG_ON(!list_empty(&dd->read_fifo)); return 1; } static struct list_head * deadline_get_sort_head(request_queue_t *q, struct request *rq) { struct deadline_data *dd = q->elevator.elevator_data; return &dd->sort_list[rq_data_dir(rq)]; } static void deadline_exit(request_queue_t *q, elevator_t *e) { struct deadline_data *dd = e->elevator_data; struct deadline_rq *drq; struct request *rq; int i; BUG_ON(!list_empty(&dd->read_fifo)); BUG_ON(!list_empty(&dd->sort_list[READ])); BUG_ON(!list_empty(&dd->sort_list[WRITE])); for (i = READ; i <= WRITE; i++) { struct request_list *rl = &q->rq[i]; struct list_head *entry = &rl->free; if (list_empty(&rl->free)) continue; while ((entry = entry->next) != &rl->free) { rq = list_entry_rq(entry); if ((drq = RQ_DATA(rq)) == NULL) continue; rq->elevator_private = NULL; kmem_cache_free(drq_pool, drq); } } kfree(dd->hash); kfree(dd); } /* * initialize elevator private data (deadline_data), and alloc a drq for * each request on the free lists */ static int deadline_init(request_queue_t *q, elevator_t *e) { struct deadline_data *dd; struct deadline_rq *drq; struct request *rq; int i, ret = 0; if (!drq_pool) return -ENOMEM; dd = kmalloc(sizeof(*dd), GFP_KERNEL); if (!dd) return -ENOMEM; memset(dd, 0, sizeof(*dd)); dd->hash = kmalloc(sizeof(struct list_head)*DL_HASH_ENTRIES,GFP_KERNEL); if (!dd->hash) { kfree(dd); return -ENOMEM; } for (i = 0; i < DL_HASH_ENTRIES; i++) INIT_LIST_HEAD(&dd->hash[i]); INIT_LIST_HEAD(&dd->read_fifo); INIT_LIST_HEAD(&dd->sort_list[READ]); INIT_LIST_HEAD(&dd->sort_list[WRITE]); dd->dispatch = &q->queue_head; dd->fifo_batch = fifo_batch; dd->read_expire = read_expire; dd->seek_cost = seek_cost; dd->hash_valid_count = 1; dd->writes_starved = writes_starved; e->elevator_data = dd; for (i = READ; i <= WRITE; i++) { struct request_list *rl = &q->rq[i]; struct list_head *entry = &rl->free; if (list_empty(&rl->free)) continue; while ((entry = entry->next) != &rl->free) { rq = list_entry_rq(entry); drq = kmem_cache_alloc(drq_pool, GFP_KERNEL); if (!drq) { ret = -ENOMEM; break; } memset(drq, 0, sizeof(*drq)); INIT_LIST_HEAD(&drq->fifo); INIT_LIST_HEAD(&drq->hash); drq->request = rq; rq->elevator_private = drq; } } if (ret) deadline_exit(q, e); return ret; } static int __init deadline_slab_setup(void) { drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!drq_pool) panic("deadline: can't init slab pool\n"); return 0; } subsys_initcall(deadline_slab_setup); elevator_t iosched_deadline = { .elevator_merge_fn = deadline_merge, .elevator_merged_fn = deadline_merged_request, .elevator_merge_req_fn = deadline_merge_request, .elevator_next_req_fn = deadline_next_request, .elevator_add_req_fn = deadline_add_request, .elevator_remove_req_fn = deadline_remove_request, .elevator_queue_empty_fn = deadline_queue_empty, .elevator_get_sort_head_fn = deadline_get_sort_head, .elevator_init_fn = deadline_init, .elevator_exit_fn = deadline_exit, }; EXPORT_SYMBOL(iosched_deadline);