Commit 00c8e791 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] self-unplugging request queues

The patch teaches a queue to unplug itself:

a) if is has four requests OR
b) if it has had plugged requests for 3 milliseconds.

These numbers may need to be tuned, although doing so doesn't seem to
make much difference.  10 msecs works OK, so HZ=100 machines will be
fine.

Instrumentation shows that about 5-10% of requests were started due to
the three millisecond timeout (during a kernel compile).  That's
somewhat significant.  It means that the kernel is leaving stuff in the
queue, plugged, for too long.  This testing was with a uniprocessor
preemptible kernel, which is particularly vulnerable to unplug latency
(submit some IO, get preempted before the unplug).

This patch permits the removal of a lot of rather lame unplugging in
page reclaim and in the writeback code, which kicks the queues
(globally!) every four megabytes to get writeback underway.

This patch doesn't use blk_run_queues().  It is able to kick just the
particular queue.

The patch is not expected to make much difference really, except for
AIO.  AIO needs a blk_run_queues() in its io_submit() call.  For each
request.  This means that AIO has to disable plugging altogether,
unless something like this patch does it for it.  It means that AIO
will unplug *all* queues in the machine for every io_submit().  Even
against a socket!

This patch was tested by disabling blk_run_queues() completely.  The
system ran OK.

The 3 milliseconds may be too long.  It's OK for the heavy writeback
code, but AIO may want less.  Or maybe AIO really wants zero (ie:
disable plugging).  If that is so, we need new code paths by which AIO
can communicate the "immediate unplug" information - a global unplug is
not good.


To minimise unplug latency due to user CPU load, this patch gives keventd
`nice -10'.  This is of course completely arbitrary.  Really, I think keventd
should be SCHED_RR/MAX_RT_PRIO-1, as it has been in -aa kernels for ages.
parent c5070032
......@@ -27,6 +27,8 @@
#include <linux/completion.h>
#include <linux/slab.h>
static void blk_unplug_work(void *data);
/*
* For the allocated request tables
*/
......@@ -237,6 +239,14 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
blk_queue_hardsect_size(q, 512);
blk_queue_dma_alignment(q, 511);
q->unplug_thresh = 4; /* hmm */
q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
if (q->unplug_delay == 0)
q->unplug_delay = 1;
init_timer(&q->unplug_timer);
INIT_WORK(&q->unplug_work, blk_unplug_work, q);
/*
* by default assume old behaviour and bounce for any highmem page
*/
......@@ -960,6 +970,7 @@ void blk_plug_device(request_queue_t *q)
if (!blk_queue_plugged(q)) {
spin_lock(&blk_plug_lock);
list_add_tail(&q->plug_list, &blk_plug_list);
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
spin_unlock(&blk_plug_lock);
}
}
......@@ -974,6 +985,7 @@ int blk_remove_plug(request_queue_t *q)
if (blk_queue_plugged(q)) {
spin_lock(&blk_plug_lock);
list_del_init(&q->plug_list);
del_timer(&q->unplug_timer);
spin_unlock(&blk_plug_lock);
return 1;
}
......@@ -992,6 +1004,8 @@ static inline void __generic_unplug_device(request_queue_t *q)
if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
return;
del_timer(&q->unplug_timer);
/*
* was plugged, fire request_fn if queue has stuff to do
*/
......@@ -1020,6 +1034,18 @@ void generic_unplug_device(void *data)
spin_unlock_irq(q->queue_lock);
}
static void blk_unplug_work(void *data)
{
generic_unplug_device(data);
}
static void blk_unplug_timeout(unsigned long data)
{
request_queue_t *q = (request_queue_t *)data;
schedule_work(&q->unplug_work);
}
/**
* blk_start_queue - restart a previously stopped queue
* @q: The &request_queue_t in question
......@@ -1164,6 +1190,9 @@ void blk_cleanup_queue(request_queue_t * q)
count -= __blk_cleanup_queue(&q->rq[READ]);
count -= __blk_cleanup_queue(&q->rq[WRITE]);
del_timer_sync(&q->unplug_timer);
flush_scheduled_work();
if (count)
printk("blk_cleanup_queue: leaked requests (%d)\n", count);
......@@ -1269,6 +1298,9 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
blk_queue_make_request(q, __make_request);
blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
q->unplug_timer.function = blk_unplug_timeout;
q->unplug_timer.data = (unsigned long)q;
blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
......@@ -1811,7 +1843,15 @@ static int __make_request(request_queue_t *q, struct bio *bio)
out:
if (freereq)
__blk_put_request(q, freereq);
if (blk_queue_plugged(q)) {
int nr_queued = (queue_nr_requests - q->rq[0].count) +
(queue_nr_requests - q->rq[1].count);
if (nr_queued == q->unplug_thresh)
__generic_unplug_device(q);
}
spin_unlock_irq(q->queue_lock);
return 0;
end_io:
......
......@@ -4,6 +4,8 @@
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/pagemap.h>
#include <linux/backing-dev.h>
#include <linux/wait.h>
......@@ -188,6 +190,14 @@ struct request_queue
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
/*
* Auto-unplugging state
*/
struct timer_list unplug_timer;
int unplug_thresh; /* After this many requests */
unsigned long unplug_delay; /* After this many jiffies */
struct work_struct unplug_work;
struct backing_dev_info backing_dev_info;
/*
......
......@@ -177,6 +177,7 @@ static int worker_thread(void *__startup)
current->flags |= PF_IOTHREAD;
cwq->thread = current;
set_user_nice(current, -10);
set_cpus_allowed(current, 1UL << cpu);
spin_lock_irq(&current->sig->siglock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment