Commit 096a705b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.1/core' of git://git.kernel.dk/linux-block

* 'for-3.1/core' of git://git.kernel.dk/linux-block: (24 commits)
  block: strict rq_affinity
  backing-dev: use synchronize_rcu_expedited instead of synchronize_rcu
  block: fix patch import error in max_discard_sectors check
  block: reorder request_queue to remove 64 bit alignment padding
  CFQ: add think time check for group
  CFQ: add think time check for service tree
  CFQ: move think time check variables to a separate struct
  fixlet: Remove fs_excl from struct task.
  cfq: Remove special treatment for metadata rqs.
  block: document blk_plug list access
  block: avoid building too big plug list
  compat_ioctl: fix make headers_check regression
  block: eliminate potential for infinite loop in blkdev_issue_discard
  compat_ioctl: fix warning caused by qemu
  block: flush MEDIA_CHANGE from drivers on close(2)
  blk-throttle: Make total_nr_queued unsigned
  block: Add __attribute__((format(printf...) and fix fallout
  fs/partitions/check.c: make local symbols static
  block:remove some spare spaces in genhd.c
  block:fix the comment error in blkdev.h
  ...
parents fea80311 5757a6d7
......@@ -45,9 +45,13 @@ device.
rq_affinity (RW)
----------------
If this option is enabled, the block layer will migrate request completions
to the CPU that originally submitted the request. For some workloads
this provides a significant reduction in CPU cycles due to caching effects.
If this option is '1', the block layer will migrate request completions to the
cpu "group" that originally submitted the request. For some workloads this
provides a significant reduction in CPU cycles due to caching effects.
For storage configurations that need to maximize distribution of completion
processing setting this option to '2' forces the completion to run on the
requesting cpu (bypassing the "group" aggregation logic).
scheduler (RW)
--------------
......
......@@ -1282,10 +1282,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
init_request_from_bio(req, bio);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
bio_flagged(bio, BIO_CPU_AFFINE)) {
req->cpu = blk_cpu_to_group(get_cpu());
put_cpu();
}
bio_flagged(bio, BIO_CPU_AFFINE))
req->cpu = smp_processor_id();
plug = current->plug;
if (plug) {
......@@ -1305,7 +1303,10 @@ static int __make_request(struct request_queue *q, struct bio *bio)
plug->should_sort = 1;
}
list_add_tail(&req->queuelist, &plug->list);
plug->count++;
drive_stat_acct(req, 1);
if (plug->count >= BLK_MAX_REQUEST_COUNT)
blk_flush_plug_list(plug, false);
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
......@@ -2629,6 +2630,7 @@ void blk_start_plug(struct blk_plug *plug)
INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->cb_list);
plug->should_sort = 0;
plug->count = 0;
/*
* If this is a nested plug, don't actually assign it. It will be
......@@ -2712,6 +2714,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
return;
list_splice_init(&plug->list, &list);
plug->count = 0;
if (plug->should_sort) {
list_sort(NULL, &list, plug_rq_cmp);
......
......@@ -82,26 +82,26 @@ void exit_io_context(struct task_struct *task)
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
{
struct io_context *ret;
struct io_context *ioc;
ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
if (ret) {
atomic_long_set(&ret->refcount, 1);
atomic_set(&ret->nr_tasks, 1);
spin_lock_init(&ret->lock);
ret->ioprio_changed = 0;
ret->ioprio = 0;
ret->last_waited = 0; /* doesn't matter... */
ret->nr_batch_requests = 0; /* because this is 0 */
INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ret->cic_list);
ret->ioc_data = NULL;
ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
if (ioc) {
atomic_long_set(&ioc->refcount, 1);
atomic_set(&ioc->nr_tasks, 1);
spin_lock_init(&ioc->lock);
ioc->ioprio_changed = 0;
ioc->ioprio = 0;
ioc->last_waited = 0; /* doesn't matter... */
ioc->nr_batch_requests = 0; /* because this is 0 */
INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ioc->cic_list);
ioc->ioc_data = NULL;
#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
ret->cgroup_changed = 0;
ioc->cgroup_changed = 0;
#endif
}
return ret;
return ioc;
}
/*
......@@ -139,19 +139,19 @@ struct io_context *current_io_context(gfp_t gfp_flags, int node)
*/
struct io_context *get_io_context(gfp_t gfp_flags, int node)
{
struct io_context *ret = NULL;
struct io_context *ioc = NULL;
/*
* Check for unlikely race with exiting task. ioc ref count is
* zero when ioc is being detached.
*/
do {
ret = current_io_context(gfp_flags, node);
if (unlikely(!ret))
ioc = current_io_context(gfp_flags, node);
if (unlikely(!ioc))
break;
} while (!atomic_long_inc_not_zero(&ret->refcount));
} while (!atomic_long_inc_not_zero(&ioc->refcount));
return ret;
return ioc;
}
EXPORT_SYMBOL(get_io_context);
......
......@@ -59,7 +59,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
* granularity
*/
max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
if (q->limits.discard_granularity) {
if (unlikely(!max_discard_sectors)) {
/* Avoid infinite loop below. Being cautious never hurts. */
return -EOPNOTSUPP;
} else if (q->limits.discard_granularity) {
unsigned int disc_sects = q->limits.discard_granularity >> 9;
max_discard_sectors &= ~(disc_sects - 1);
......
......@@ -103,22 +103,25 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
void __blk_complete_request(struct request *req)
{
int ccpu, cpu, group_cpu = NR_CPUS;
struct request_queue *q = req->q;
unsigned long flags;
int ccpu, cpu, group_cpu;
BUG_ON(!q->softirq_done_fn);
local_irq_save(flags);
cpu = smp_processor_id();
group_cpu = blk_cpu_to_group(cpu);
/*
* Select completion CPU
*/
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
ccpu = req->cpu;
else
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
ccpu = blk_cpu_to_group(ccpu);
group_cpu = blk_cpu_to_group(cpu);
}
} else
ccpu = cpu;
if (ccpu == cpu || ccpu == group_cpu) {
......
......@@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
return queue_var_show(set, page);
return queue_var_show(set << force, page);
}
static ssize_t
......@@ -257,10 +258,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
if (val)
if (val) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
else
if (val == 2)
queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
} else {
queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
}
spin_unlock_irq(q->queue_lock);
#endif
return ret;
......
......@@ -142,9 +142,9 @@ static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
return NULL;
}
static inline int total_nr_queued(struct throtl_data *td)
static inline unsigned int total_nr_queued(struct throtl_data *td)
{
return (td->nr_queued[0] + td->nr_queued[1]);
return td->nr_queued[0] + td->nr_queued[1];
}
static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
......@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
bio_list_init(&bio_list_on_stack);
throtl_log(td, "dispatch nr_queued=%d read=%u write=%u",
throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
total_nr_queued(td), td->nr_queued[READ],
td->nr_queued[WRITE]);
......@@ -970,7 +970,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
struct delayed_work *dwork = &td->throtl_work;
/* schedule work if limits changed even if no bio is queued */
if (total_nr_queued(td) > 0 || td->limits_changed) {
if (total_nr_queued(td) || td->limits_changed) {
/*
* We might have a work scheduled to be executed in future.
* Cancel that and schedule a new one.
......
......@@ -87,9 +87,10 @@ struct cfq_rb_root {
unsigned count;
unsigned total_weight;
u64 min_vdisktime;
struct cfq_ttime ttime;
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
.count = 0, .min_vdisktime = 0, }
#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
.ttime = {.last_end_request = jiffies,},}
/*
* Per process-grouping structure
......@@ -129,14 +130,12 @@ struct cfq_queue {
unsigned long slice_end;
long slice_resid;
/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
/* io prio of this group */
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
unsigned short ioprio_class;
pid_t pid;
......@@ -212,6 +211,7 @@ struct cfq_group {
#endif
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
struct cfq_ttime ttime;
};
/*
......@@ -393,6 +393,18 @@ CFQ_CFQQ_FNS(wait_busy);
j++, st = i < IDLE_WORKLOAD ? \
&cfqg->service_trees[i][j]: NULL) \
static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
struct cfq_ttime *ttime, bool group_idle)
{
unsigned long slice;
if (!sample_valid(ttime->ttime_samples))
return false;
if (group_idle)
slice = cfqd->cfq_group_idle;
else
slice = cfqd->cfq_slice_idle;
return ttime->ttime_mean > slice;
}
static inline bool iops_mode(struct cfq_data *cfqd)
{
......@@ -670,9 +682,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
if (rq_is_sync(rq1) != rq_is_sync(rq2))
return rq_is_sync(rq1) ? rq1 : rq2;
if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
return rq1->cmd_flags & REQ_META ? rq1 : rq2;
s1 = blk_rq_pos(rq1);
s2 = blk_rq_pos(rq2);
......@@ -1005,7 +1014,7 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
return NULL;
}
void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
unsigned int weight)
{
struct cfq_group *cfqg = cfqg_of_blkg(blkg);
......@@ -1059,6 +1068,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
*st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->rb_node);
cfqg->ttime.last_end_request = jiffies;
/*
* Take the initial reference that will be released on destroy
* This can be thought of a joint reference by cgroup and
......@@ -1235,7 +1246,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
* it should not be NULL as even if elevator was exiting, cgroup deltion
* path got to it first.
*/
void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
{
unsigned long flags;
struct cfq_data *cfqd = key;
......@@ -1502,16 +1513,11 @@ static void cfq_add_rq_rb(struct request *rq)
{
struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
struct request *__alias, *prev;
struct request *prev;
cfqq->queued[rq_is_sync(rq)]++;
/*
* looks a little odd, but the first insert might return an alias.
* if that happens, put the alias on the dispatch list
*/
while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
cfq_dispatch_insert(cfqd->queue, __alias);
elv_rb_add(&cfqq->sort_list, rq);
if (!cfq_cfqq_on_rr(cfqq))
cfq_add_cfqq_rr(cfqd, cfqq);
......@@ -1598,10 +1604,6 @@ static void cfq_remove_request(struct request *rq)
cfqq->cfqd->rq_queued--;
cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
rq_data_dir(rq), rq_is_sync(rq));
if (rq->cmd_flags & REQ_META) {
WARN_ON(!cfqq->meta_pending);
cfqq->meta_pending--;
}
}
static int cfq_merge(struct request_queue *q, struct request **req,
......@@ -1969,7 +1971,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* Otherwise, we do only if they are the last ones
* in their service tree.
*/
if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
!cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
return true;
cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
service_tree->count);
......@@ -2022,10 +2025,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* slice, then don't idle. This avoids overrunning the allotted
* time slice.
*/
if (sample_valid(cic->ttime_samples) &&
(cfqq->slice_end - jiffies < cic->ttime_mean)) {
if (sample_valid(cic->ttime.ttime_samples) &&
(cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
cic->ttime_mean);
cic->ttime.ttime_mean);
return;
}
......@@ -2381,8 +2384,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
* this group, wait for requests to complete.
*/
check_group_idle:
if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1
&& cfqq->cfqg->dispatched) {
if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
cfqq->cfqg->dispatched &&
!cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
cfqq = NULL;
goto keep_queue;
}
......@@ -2833,7 +2837,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
cfqd->queue->node);
if (cic) {
cic->last_end_request = jiffies;
cic->ttime.last_end_request = jiffies;
INIT_LIST_HEAD(&cic->queue_list);
INIT_HLIST_NODE(&cic->cic_list);
cic->dtor = cfq_free_io_context;
......@@ -2883,7 +2887,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
* elevate the priority of this queue
*/
cfqq->org_ioprio = cfqq->ioprio;
cfqq->org_ioprio_class = cfqq->ioprio_class;
cfq_clear_cfqq_prio_changed(cfqq);
}
......@@ -3221,14 +3224,28 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
}
static void
cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
{
unsigned long elapsed = jiffies - cic->last_end_request;
unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
unsigned long elapsed = jiffies - ttime->last_end_request;
elapsed = min(elapsed, 2UL * slice_idle);
ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
}
cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
static void
cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_io_context *cic)
{
if (cfq_cfqq_sync(cfqq)) {
__cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
__cfq_update_io_thinktime(&cfqq->service_tree->ttime,
cfqd->cfq_slice_idle);
}
#ifdef CONFIG_CFQ_GROUP_IOSCHED
__cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
#endif
}
static void
......@@ -3277,8 +3294,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
(!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
else if (sample_valid(cic->ttime_samples)) {
if (cic->ttime_mean > cfqd->cfq_slice_idle)
else if (sample_valid(cic->ttime.ttime_samples)) {
if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
enable_idle = 0;
else
enable_idle = 1;
......@@ -3339,13 +3356,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
RB_EMPTY_ROOT(&cfqq->sort_list))
return true;
/*
* So both queues are sync. Let the new request get disk time if
* it's a metadata request and the current queue is doing regular IO.
*/
if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
return true;
/*
* Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
*/
......@@ -3410,10 +3420,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_io_context *cic = RQ_CIC(rq);
cfqd->rq_queued++;
if (rq->cmd_flags & REQ_META)
cfqq->meta_pending++;
cfq_update_io_thinktime(cfqd, cic);
cfq_update_io_thinktime(cfqd, cfqq, cic);
cfq_update_io_seektime(cfqd, cfqq, rq);
cfq_update_idle_window(cfqd, cfqq, cic);
......@@ -3520,12 +3528,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
if (cfqq->cfqg->nr_cfqq > 1)
return false;
/* the only queue in the group, but think time is big */
if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
return false;
if (cfq_slice_used(cfqq))
return true;
/* if slice left is less than think time, wait busy */
if (cic && sample_valid(cic->ttime_samples)
&& (cfqq->slice_end - jiffies < cic->ttime_mean))
if (cic && sample_valid(cic->ttime.ttime_samples)
&& (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
return true;
/*
......@@ -3566,11 +3578,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
if (sync) {
RQ_CIC(rq)->last_end_request = now;
struct cfq_rb_root *service_tree;
RQ_CIC(rq)->ttime.last_end_request = now;
if (cfq_cfqq_on_rr(cfqq))
service_tree = cfqq->service_tree;
else
service_tree = service_tree_for(cfqq->cfqg,
cfqq_prio(cfqq), cfqq_type(cfqq));
service_tree->ttime.last_end_request = now;
if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
cfqd->last_delayed_sync = now;
}
#ifdef CONFIG_CFQ_GROUP_IOSCHED
cfqq->cfqg->ttime.last_end_request = now;
#endif
/*
* If this is the active queue, check if it needs to be expired,
* or if we want to idle in case it has no pending requests.
......@@ -3616,30 +3641,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_schedule_dispatch(cfqd);
}
/*
* we temporarily boost lower priority queues if they are holding fs exclusive
* resources. they are boosted to normal prio (CLASS_BE/4)
*/
static void cfq_prio_boost(struct cfq_queue *cfqq)
{
if (has_fs_excl()) {
/*
* boost idle prio on transactions that would lock out other
* users of the filesystem
*/
if (cfq_class_idle(cfqq))
cfqq->ioprio_class = IOPRIO_CLASS_BE;
if (cfqq->ioprio > IOPRIO_NORM)
cfqq->ioprio = IOPRIO_NORM;
} else {
/*
* unboost the queue (if needed)
*/
cfqq->ioprio_class = cfqq->org_ioprio_class;
cfqq->ioprio = cfqq->org_ioprio;
}
}
static inline int __cfq_may_queue(struct cfq_queue *cfqq)
{
if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
......@@ -3670,7 +3671,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
if (cfqq) {
cfq_init_prio_data(cfqq, cic->ioc);
cfq_prio_boost(cfqq);
return __cfq_may_queue(cfqq);
}
......
......@@ -208,19 +208,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode,
#define BLKBSZSET_32 _IOW(0x12, 113, int)
#define BLKGETSIZE64_32 _IOR(0x12, 114, int)
struct compat_floppy_struct {
compat_uint_t size;
compat_uint_t sect;
compat_uint_t head;
compat_uint_t track;
compat_uint_t stretch;
unsigned char gap;
unsigned char rate;
unsigned char spec1;
unsigned char fmt_gap;
const compat_caddr_t name;
};
struct compat_floppy_drive_params {
char cmos;
compat_ulong_t max_dtr;
......@@ -288,7 +275,6 @@ struct compat_floppy_write_errors {
#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct)
#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct)
#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params)
#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params)
#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct)
......
......@@ -77,10 +77,8 @@ static void
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
{
struct rb_root *root = deadline_rb_root(dd, rq);
struct request *__alias;
while (unlikely(__alias = elv_rb_add(root, rq)))
deadline_move_request(dd, __alias);
elv_rb_add(root, rq);
}
static inline void
......
......@@ -353,7 +353,7 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
* RB-tree support functions for inserting/lookup/removal of requests
* in a sorted RB tree.
*/
struct request *elv_rb_add(struct rb_root *root, struct request *rq)
void elv_rb_add(struct rb_root *root, struct request *rq)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
......@@ -365,15 +365,12 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
if (blk_rq_pos(rq) < blk_rq_pos(__rq))
p = &(*p)->rb_left;
else if (blk_rq_pos(rq) > blk_rq_pos(__rq))
else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
p = &(*p)->rb_right;
else
return __rq;
}
rb_link_node(&rq->rb_node, parent, p);
rb_insert_color(&rq->rb_node, root);
return NULL;
}
EXPORT_SYMBOL(elv_rb_add);
......
......@@ -1492,30 +1492,32 @@ void disk_unblock_events(struct gendisk *disk)
}
/**
* disk_check_events - schedule immediate event checking
* @disk: disk to check events for
* disk_flush_events - schedule immediate event checking and flushing
* @disk: disk to check and flush events for
* @mask: events to flush
*
* Schedule immediate event checking on @disk if not blocked.
* Schedule immediate event checking on @disk if not blocked. Events in
* @mask are scheduled to be cleared from the driver. Note that this
* doesn't clear the events from @disk->ev.
*
* CONTEXT:
* Don't care. Safe to call from irq context.
* If @mask is non-zero must be called with bdev->bd_mutex held.
*/
void disk_check_events(struct gendisk *disk)
void disk_flush_events(struct gendisk *disk, unsigned int mask)
{
struct disk_events *ev = disk->ev;
unsigned long flags;
if (!ev)
return;
spin_lock_irqsave(&ev->lock, flags);
spin_lock_irq(&ev->lock);
ev->clearing |= mask;
if (!ev->block) {
cancel_delayed_work(&ev->dwork);
queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
}
spin_unlock_irqrestore(&ev->lock, flags);
spin_unlock_irq(&ev->lock);
}
EXPORT_SYMBOL_GPL(disk_check_events);
/**
* disk_clear_events - synchronously check, clear and return pending events
......@@ -1705,7 +1707,7 @@ static int disk_events_set_dfl_poll_msecs(const char *val,
mutex_lock(&disk_events_mutex);
list_for_each_entry(ev, &disk_events, node)
disk_check_events(ev->disk);
disk_flush_events(ev->disk, 0);
mutex_unlock(&disk_events_mutex);
......
......@@ -1448,6 +1448,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
int blkdev_put(struct block_device *bdev, fmode_t mode)
{
mutex_lock(&bdev->bd_mutex);
if (mode & FMODE_EXCL) {
bool bdev_free;
......@@ -1456,7 +1458,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
* are protected with bdev_lock. bd_mutex is to
* synchronize disk_holder unlinking.
*/
mutex_lock(&bdev->bd_mutex);
spin_lock(&bdev_lock);
WARN_ON_ONCE(--bdev->bd_holders < 0);
......@@ -1474,16 +1475,20 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
* If this was the last claim, remove holder link and
* unblock evpoll if it was a write holder.
*/
if (bdev_free) {
if (bdev->bd_write_holder) {
if (bdev_free && bdev->bd_write_holder) {
disk_unblock_events(bdev->bd_disk);
disk_check_events(bdev->bd_disk);
bdev->bd_write_holder = false;
}
}
/*
* Trigger event checking and tell drivers to flush MEDIA_CHANGE
* event. This is to ensure detection of media removal commanded
* from userland - e.g. eject(1).
*/
disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
mutex_unlock(&bdev->bd_mutex);
}
return __blkdev_put(bdev, mode, 0);
}
......
......@@ -68,6 +68,8 @@
#ifdef CONFIG_BLOCK
#include <linux/loop.h>
#include <linux/cdrom.h>
#include <linux/fd.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
#include <scsi/sg.h>
......@@ -944,6 +946,9 @@ COMPATIBLE_IOCTL(FIOQSIZE)
IGNORE_IOCTL(LOOP_CLR_FD)
/* md calls this on random blockdevs */
IGNORE_IOCTL(RAID_VERSION)
/* qemu/qemu-img might call these two on plain files for probing */
IGNORE_IOCTL(CDROM_DRIVE_STATUS)
IGNORE_IOCTL(FDGETPRM32)
/* SG stuff */
COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
......
......@@ -237,21 +237,21 @@ ssize_t part_size_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
}
ssize_t part_ro_show(struct device *dev,
static ssize_t part_ro_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%d\n", p->policy ? 1 : 0);
}
ssize_t part_alignment_offset_show(struct device *dev,
static ssize_t part_alignment_offset_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
}
ssize_t part_discard_alignment_show(struct device *dev,
static ssize_t part_discard_alignment_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
......
......@@ -678,23 +678,19 @@ struct buffer_chunk {
static void write_chunk(struct buffer_chunk *chunk)
{
int i;
get_fs_excl();
for (i = 0; i < chunk->nr; i++) {
submit_logged_buffer(chunk->bh[i]);
}
chunk->nr = 0;
put_fs_excl();
}
static void write_ordered_chunk(struct buffer_chunk *chunk)
{
int i;
get_fs_excl();
for (i = 0; i < chunk->nr; i++) {
submit_ordered_buffer(chunk->bh[i]);
}
chunk->nr = 0;
put_fs_excl();
}
static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
......@@ -986,8 +982,6 @@ static int flush_commit_list(struct super_block *s,
return 0;
}
get_fs_excl();
/* before we can put our commit blocks on disk, we have to make sure everyone older than
** us is on disk too
*/
......@@ -1145,7 +1139,6 @@ static int flush_commit_list(struct super_block *s,
if (retval)
reiserfs_abort(s, retval, "Journal write error in %s",
__func__);
put_fs_excl();
return retval;
}
......@@ -1374,8 +1367,6 @@ static int flush_journal_list(struct super_block *s,
return 0;
}
get_fs_excl();
/* if all the work is already done, get out of here */
if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
atomic_read(&(jl->j_commit_left)) <= 0) {
......@@ -1597,7 +1588,6 @@ static int flush_journal_list(struct super_block *s,
put_journal_list(s, jl);
if (flushall)
mutex_unlock(&journal->j_flush_mutex);
put_fs_excl();
return err;
}
......@@ -3108,7 +3098,6 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
th->t_trans_id = journal->j_trans_id;
unlock_journal(sb);
INIT_LIST_HEAD(&th->t_list);
get_fs_excl();
return 0;
out_fail:
......@@ -3964,7 +3953,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
flush = flags & FLUSH_ALL;
wait_on_commit = flags & WAIT;
put_fs_excl();
current->journal_info = th->t_handle_save;
reiserfs_check_lock_depth(sb, "journal end");
if (journal->j_len == 0) {
......@@ -4316,4 +4304,3 @@ void reiserfs_abort_journal(struct super_block *sb, int errno)
dump_stack();
#endif
}
......@@ -351,13 +351,11 @@ bool grab_super_passive(struct super_block *sb)
*/
void lock_super(struct super_block * sb)
{
get_fs_excl();
mutex_lock(&sb->s_lock);
}
void unlock_super(struct super_block * sb)
{
put_fs_excl();
mutex_unlock(&sb->s_lock);
}
......@@ -385,7 +383,6 @@ void generic_shutdown_super(struct super_block *sb)
if (sb->s_root) {
shrink_dcache_for_umount(sb);
sync_filesystem(sb);
get_fs_excl();
sb->s_flags &= ~MS_ACTIVE;
fsnotify_unmount_inodes(&sb->s_inodes);
......@@ -400,7 +397,6 @@ void generic_shutdown_super(struct super_block *sb)
"Self-destruct in 5 seconds. Have a nice day...\n",
sb->s_id);
}
put_fs_excl();
}
spin_lock(&sb_lock);
/* should be initialized for __put_super_and_need_restart() */
......
......@@ -73,7 +73,7 @@ enum rq_cmd_type_bits {
/*
* try to put the fields that are referenced together in the same cacheline.
* if you modify this structure, be sure to check block/blk-core.c:rq_init()
* if you modify this structure, be sure to check block/blk-core.c:blk_rq_init()
* as well!
*/
struct request {
......@@ -260,8 +260,7 @@ struct queue_limits {
unsigned char discard_zeroes_data;
};
struct request_queue
{
struct request_queue {
/*
* Together with queue_head for cacheline sharing
*/
......@@ -304,14 +303,14 @@ struct request_queue
void *queuedata;
/*
* queue needs bounce pages for pages above this limit
* various queue flags, see QUEUE_* below
*/
gfp_t bounce_gfp;
unsigned long queue_flags;
/*
* various queue flags, see QUEUE_* below
* queue needs bounce pages for pages above this limit
*/
unsigned long queue_flags;
gfp_t bounce_gfp;
/*
* protects queue structures from reentrancy. ->__queue_lock should
......@@ -334,8 +333,8 @@ struct request_queue
unsigned int nr_congestion_off;
unsigned int nr_batching;
void *dma_drain_buffer;
unsigned int dma_drain_size;
void *dma_drain_buffer;
unsigned int dma_pad_mask;
unsigned int dma_alignment;
......@@ -393,7 +392,7 @@ struct request_queue
#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */
#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 9 /* force complete on same CPU */
#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */
#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */
#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */
......@@ -403,6 +402,7 @@ struct request_queue
#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */
#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
......@@ -857,12 +857,21 @@ struct request_queue *blk_alloc_queue(gfp_t);
struct request_queue *blk_alloc_queue_node(gfp_t, int);
extern void blk_put_queue(struct request_queue *);
/*
* Note: Code in between changing the blk_plug list/cb_list or element of such
* lists is preemptable, but such code can't do sleep (or be very careful),
* otherwise data is corrupted. For details, please check schedule() where
* blk_schedule_flush_plug() is called.
*/
struct blk_plug {
unsigned long magic;
struct list_head list;
struct list_head cb_list;
unsigned int should_sort;
unsigned int count;
};
#define BLK_MAX_REQUEST_COUNT 16
struct blk_plug_cb {
struct list_head list;
void (*callback)(struct blk_plug_cb *);
......
......@@ -146,7 +146,7 @@ extern struct request *elv_rb_latter_request(struct request_queue *, struct requ
/*
* rb support functions.
*/
extern struct request *elv_rb_add(struct rb_root *, struct request *);
extern void elv_rb_add(struct rb_root *, struct request *);
extern void elv_rb_del(struct rb_root *, struct request *);
extern struct request *elv_rb_find(struct rb_root *, sector_t);
......
......@@ -377,4 +377,26 @@ struct floppy_raw_cmd {
#define FDEJECT _IO(2, 0x5a)
/* eject the disk */
#ifdef __KERNEL__
#ifdef CONFIG_COMPAT
#include <linux/compat.h>
struct compat_floppy_struct {
compat_uint_t size;
compat_uint_t sect;
compat_uint_t head;
compat_uint_t track;
compat_uint_t stretch;
unsigned char gap;
unsigned char rate;
unsigned char spec1;
unsigned char fmt_gap;
const compat_caddr_t name;
};
#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
#endif
#endif
#endif
......@@ -1469,10 +1469,6 @@ enum {
#define vfs_check_frozen(sb, level) \
wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
#define get_fs_excl() atomic_inc(&current->fs_excl)
#define put_fs_excl() atomic_dec(&current->fs_excl)
#define has_fs_excl() atomic_read(&current->fs_excl)
/*
* until VFS tracks user namespaces for inodes, just make all files
* belong to init_user_ns
......
......@@ -420,7 +420,7 @@ static inline int get_disk_ro(struct gendisk *disk)
extern void disk_block_events(struct gendisk *disk);
extern void disk_unblock_events(struct gendisk *disk);
extern void disk_check_events(struct gendisk *disk);
extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
/* drivers/char/random.c */
......
......@@ -176,7 +176,6 @@ extern struct cred init_cred;
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.fs_excl = ATOMIC_INIT(0), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
.timer_slack_ns = 50000, /* 50 usec default slack */ \
.pids = { \
......
......@@ -5,6 +5,14 @@
#include <linux/rcupdate.h>
struct cfq_queue;
struct cfq_ttime {
unsigned long last_end_request;
unsigned long ttime_total;
unsigned long ttime_samples;
unsigned long ttime_mean;
};
struct cfq_io_context {
void *key;
......@@ -12,11 +20,7 @@ struct cfq_io_context {
struct io_context *ioc;
unsigned long last_end_request;
unsigned long ttime_total;
unsigned long ttime_samples;
unsigned long ttime_mean;
struct cfq_ttime ttime;
struct list_head queue_list;
struct hlist_node cic_list;
......
......@@ -1512,7 +1512,6 @@ struct task_struct {
short il_next;
short pref_node_fork;
#endif
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;
/*
......
......@@ -898,7 +898,6 @@ NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);
WARN_ON(atomic_read(&tsk->fs_excl));
WARN_ON(blk_needs_flush_plug(tsk));
if (unlikely(in_interrupt()))
......
......@@ -290,7 +290,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
/* One for us, one for whoever does the "release_task()" (usually parent) */
atomic_set(&tsk->usage,2);
atomic_set(&tsk->fs_excl, 0);
#ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
#endif
......
......@@ -505,7 +505,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
list_del_rcu(&bdi->bdi_list);
spin_unlock_bh(&bdi_lock);
synchronize_rcu();
synchronize_rcu_expedited();
}
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment