Commit b4412323 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  cfq-iosched: fix RCU problem in cfq_cic_lookup()
  block: make blktrace use per-cpu buffers for message notes
  Added in elevator switch message to blktrace stream
  Added in MESSAGE notes for blktraces
  block: reorder cfq_queue to save space on 64bit builds
  block: Move the second call to get_request to the end of the loop
  splice: handle try_to_release_page() failure
  splice: fix sendfile() issue with relay
parents dc1d60a0 d6de8be7
...@@ -806,35 +806,32 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, ...@@ -806,35 +806,32 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
rq = get_request(q, rw_flags, bio, GFP_NOIO); rq = get_request(q, rw_flags, bio, GFP_NOIO);
while (!rq) { while (!rq) {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
struct io_context *ioc;
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
prepare_to_wait_exclusive(&rl->wait[rw], &wait, prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
rq = get_request(q, rw_flags, bio, GFP_NOIO); blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
if (!rq) {
struct io_context *ioc;
blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); __generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
__generic_unplug_device(q); io_schedule();
spin_unlock_irq(q->queue_lock);
io_schedule();
/* /*
* After sleeping, we become a "batching" process and * After sleeping, we become a "batching" process and
* will be able to allocate at least one request, and * will be able to allocate at least one request, and
* up to a big batch of them for a small period time. * up to a big batch of them for a small period time.
* See ioc_batching, ioc_set_batching * See ioc_batching, ioc_set_batching
*/ */
ioc = current_io_context(GFP_NOIO, q->node); ioc = current_io_context(GFP_NOIO, q->node);
ioc_set_batching(q, ioc); ioc_set_batching(q, ioc);
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
}
finish_wait(&rl->wait[rw], &wait); finish_wait(&rl->wait[rw], &wait);
}
rq = get_request(q, rw_flags, bio, GFP_NOIO);
};
return rq; return rq;
} }
......
...@@ -75,6 +75,23 @@ static void trace_note_time(struct blk_trace *bt) ...@@ -75,6 +75,23 @@ static void trace_note_time(struct blk_trace *bt)
local_irq_restore(flags); local_irq_restore(flags);
} }
void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
{
int n;
va_list args;
char *buf;
preempt_disable();
buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
va_start(args, fmt);
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
va_end(args);
trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
preempt_enable();
}
EXPORT_SYMBOL_GPL(__trace_note_message);
static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
pid_t pid) pid_t pid)
{ {
...@@ -232,6 +249,7 @@ static void blk_trace_cleanup(struct blk_trace *bt) ...@@ -232,6 +249,7 @@ static void blk_trace_cleanup(struct blk_trace *bt)
debugfs_remove(bt->dropped_file); debugfs_remove(bt->dropped_file);
blk_remove_tree(bt->dir); blk_remove_tree(bt->dir);
free_percpu(bt->sequence); free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt); kfree(bt);
} }
...@@ -346,6 +364,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ...@@ -346,6 +364,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!bt->sequence) if (!bt->sequence)
goto err; goto err;
bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
if (!bt->msg_data)
goto err;
ret = -ENOENT; ret = -ENOENT;
dir = blk_create_tree(buts->name); dir = blk_create_tree(buts->name);
if (!dir) if (!dir)
...@@ -392,6 +414,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ...@@ -392,6 +414,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (bt->dropped_file) if (bt->dropped_file)
debugfs_remove(bt->dropped_file); debugfs_remove(bt->dropped_file);
free_percpu(bt->sequence); free_percpu(bt->sequence);
free_percpu(bt->msg_data);
if (bt->rchan) if (bt->rchan)
relay_close(bt->rchan); relay_close(bt->rchan);
kfree(bt); kfree(bt);
......
...@@ -124,6 +124,8 @@ struct cfq_data { ...@@ -124,6 +124,8 @@ struct cfq_data {
struct cfq_queue { struct cfq_queue {
/* reference count */ /* reference count */
atomic_t ref; atomic_t ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */ /* parent cfq_data */
struct cfq_data *cfqd; struct cfq_data *cfqd;
/* service_tree member */ /* service_tree member */
...@@ -138,14 +140,14 @@ struct cfq_queue { ...@@ -138,14 +140,14 @@ struct cfq_queue {
int queued[2]; int queued[2];
/* currently allocated requests */ /* currently allocated requests */
int allocated[2]; int allocated[2];
/* pending metadata requests */
int meta_pending;
/* fifo list of requests in sort_list */ /* fifo list of requests in sort_list */
struct list_head fifo; struct list_head fifo;
unsigned long slice_end; unsigned long slice_end;
long slice_resid; long slice_resid;
/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */ /* number of requests that are on the dispatch list or inside driver */
int dispatched; int dispatched;
...@@ -153,8 +155,6 @@ struct cfq_queue { ...@@ -153,8 +155,6 @@ struct cfq_queue {
unsigned short ioprio, org_ioprio; unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class; unsigned short ioprio_class, org_ioprio_class;
/* various state flags, see below */
unsigned int flags;
}; };
enum cfqq_state_flags { enum cfqq_state_flags {
...@@ -1142,6 +1142,9 @@ static void cfq_put_queue(struct cfq_queue *cfqq) ...@@ -1142,6 +1142,9 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
kmem_cache_free(cfq_pool, cfqq); kmem_cache_free(cfq_pool, cfqq);
} }
/*
* Must always be called with the rcu_read_lock() held
*/
static void static void
__call_for_each_cic(struct io_context *ioc, __call_for_each_cic(struct io_context *ioc,
void (*func)(struct io_context *, struct cfq_io_context *)) void (*func)(struct io_context *, struct cfq_io_context *))
...@@ -1197,6 +1200,11 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) ...@@ -1197,6 +1200,11 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
cfq_cic_free(cic); cfq_cic_free(cic);
} }
/*
* Must be called with rcu_read_lock() held or preemption otherwise disabled.
* Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
* and ->trim() which is called with the task lock held
*/
static void cfq_free_io_context(struct io_context *ioc) static void cfq_free_io_context(struct io_context *ioc)
{ {
/* /*
...@@ -1502,20 +1510,24 @@ static struct cfq_io_context * ...@@ -1502,20 +1510,24 @@ static struct cfq_io_context *
cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
{ {
struct cfq_io_context *cic; struct cfq_io_context *cic;
unsigned long flags;
void *k; void *k;
if (unlikely(!ioc)) if (unlikely(!ioc))
return NULL; return NULL;
rcu_read_lock();
/* /*
* we maintain a last-hit cache, to avoid browsing over the tree * we maintain a last-hit cache, to avoid browsing over the tree
*/ */
cic = rcu_dereference(ioc->ioc_data); cic = rcu_dereference(ioc->ioc_data);
if (cic && cic->key == cfqd) if (cic && cic->key == cfqd) {
rcu_read_unlock();
return cic; return cic;
}
do { do {
rcu_read_lock();
cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
rcu_read_unlock(); rcu_read_unlock();
if (!cic) if (!cic)
...@@ -1524,10 +1536,13 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) ...@@ -1524,10 +1536,13 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
k = cic->key; k = cic->key;
if (unlikely(!k)) { if (unlikely(!k)) {
cfq_drop_dead_cic(cfqd, ioc, cic); cfq_drop_dead_cic(cfqd, ioc, cic);
rcu_read_lock();
continue; continue;
} }
spin_lock_irqsave(&ioc->lock, flags);
rcu_assign_pointer(ioc->ioc_data, cic); rcu_assign_pointer(ioc->ioc_data, cic);
spin_unlock_irqrestore(&ioc->lock, flags);
break; break;
} while (1); } while (1);
...@@ -2134,6 +2149,10 @@ static void *cfq_init_queue(struct request_queue *q) ...@@ -2134,6 +2149,10 @@ static void *cfq_init_queue(struct request_queue *q)
static void cfq_slab_kill(void) static void cfq_slab_kill(void)
{ {
/*
* Caller already ensured that pending RCU callbacks are completed,
* so we should have no busy allocations at this point.
*/
if (cfq_pool) if (cfq_pool)
kmem_cache_destroy(cfq_pool); kmem_cache_destroy(cfq_pool);
if (cfq_ioc_pool) if (cfq_ioc_pool)
...@@ -2292,6 +2311,11 @@ static void __exit cfq_exit(void) ...@@ -2292,6 +2311,11 @@ static void __exit cfq_exit(void)
ioc_gone = &all_gone; ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */ /* ioc_gone's update must be visible before reading ioc_count */
smp_wmb(); smp_wmb();
/*
* this also protects us from entering cfq_slab_kill() with
* pending RCU callbacks
*/
if (elv_ioc_count_read(ioc_count)) if (elv_ioc_count_read(ioc_count))
wait_for_completion(ioc_gone); wait_for_completion(ioc_gone);
cfq_slab_kill(); cfq_slab_kill();
......
...@@ -1110,6 +1110,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) ...@@ -1110,6 +1110,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
return 1; return 1;
fail_register: fail_register:
......
...@@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, ...@@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
*/ */
wait_on_page_writeback(page); wait_on_page_writeback(page);
if (PagePrivate(page)) if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
try_to_release_page(page, GFP_KERNEL); goto out_unlock;
/* /*
* If we succeeded in removing the mapping, set LRU flag * If we succeeded in removing the mapping, set LRU flag
...@@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, ...@@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
* Raced with truncate or failed to remove page from current * Raced with truncate or failed to remove page from current
* address space, unlock and return failure. * address space, unlock and return failure.
*/ */
out_unlock:
unlock_page(page); unlock_page(page);
return 1; return 1;
} }
...@@ -983,7 +984,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ...@@ -983,7 +984,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
while (len) { while (len) {
size_t read_len; size_t read_len;
loff_t pos = sd->pos; loff_t pos = sd->pos, prev_pos = pos;
ret = do_splice_to(in, &pos, pipe, len, flags); ret = do_splice_to(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
...@@ -998,15 +999,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ...@@ -998,15 +999,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
* could get stuck data in the internal pipe: * could get stuck data in the internal pipe:
*/ */
ret = actor(pipe, sd); ret = actor(pipe, sd);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0)) {
sd->pos = prev_pos;
goto out_release; goto out_release;
}
bytes += ret; bytes += ret;
len -= ret; len -= ret;
sd->pos = pos; sd->pos = pos;
if (ret < read_len) if (ret < read_len) {
sd->pos = prev_pos + ret;
goto out_release; goto out_release;
}
} }
done: done:
...@@ -1072,7 +1077,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ...@@ -1072,7 +1077,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
ret = splice_direct_to_actor(in, &sd, direct_splice_actor); ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
if (ret > 0) if (ret > 0)
*ppos += ret; *ppos = sd.pos;
return ret; return ret;
} }
......
...@@ -55,6 +55,7 @@ enum blktrace_act { ...@@ -55,6 +55,7 @@ enum blktrace_act {
enum blktrace_notify { enum blktrace_notify {
__BLK_TN_PROCESS = 0, /* establish pid/name mapping */ __BLK_TN_PROCESS = 0, /* establish pid/name mapping */
__BLK_TN_TIMESTAMP, /* include system clock */ __BLK_TN_TIMESTAMP, /* include system clock */
__BLK_TN_MESSAGE, /* Character string message */
}; };
...@@ -79,6 +80,7 @@ enum blktrace_notify { ...@@ -79,6 +80,7 @@ enum blktrace_notify {
#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_IO_TRACE_MAGIC 0x65617400 #define BLK_IO_TRACE_MAGIC 0x65617400
#define BLK_IO_TRACE_VERSION 0x07 #define BLK_IO_TRACE_VERSION 0x07
...@@ -119,6 +121,7 @@ struct blk_trace { ...@@ -119,6 +121,7 @@ struct blk_trace {
int trace_state; int trace_state;
struct rchan *rchan; struct rchan *rchan;
unsigned long *sequence; unsigned long *sequence;
unsigned char *msg_data;
u16 act_mask; u16 act_mask;
u64 start_lba; u64 start_lba;
u64 end_lba; u64 end_lba;
...@@ -149,7 +152,28 @@ extern void blk_trace_shutdown(struct request_queue *); ...@@ -149,7 +152,28 @@ extern void blk_trace_shutdown(struct request_queue *);
extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
extern int do_blk_trace_setup(struct request_queue *q, extern int do_blk_trace_setup(struct request_queue *q,
char *name, dev_t dev, struct blk_user_trace_setup *buts); char *name, dev_t dev, struct blk_user_trace_setup *buts);
extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
/**
* blk_add_trace_msg - Add a (simple) message to the blktrace stream
* @q: queue the io is for
* @fmt: format to print message in
* args... Variable argument list for format
*
* Description:
* Records a (simple) message onto the blktrace stream.
*
* NOTE: BLK_TN_MAX_MSG characters are output at most.
* NOTE: Can not use 'static inline' due to presence of var args...
*
**/
#define blk_add_trace_msg(q, fmt, ...) \
do { \
struct blk_trace *bt = (q)->blk_trace; \
if (unlikely(bt)) \
__trace_note_message(bt, fmt, ##__VA_ARGS__); \
} while (0)
#define BLK_TN_MAX_MSG 128
/** /**
* blk_add_trace_rq - Add a trace for a request oriented action * blk_add_trace_rq - Add a trace for a request oriented action
...@@ -299,6 +323,8 @@ extern int blk_trace_remove(struct request_queue *q); ...@@ -299,6 +323,8 @@ extern int blk_trace_remove(struct request_queue *q);
#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) #define blk_trace_setup(q, name, dev, arg) (-ENOTTY)
#define blk_trace_startstop(q, start) (-ENOTTY) #define blk_trace_startstop(q, start) (-ENOTTY)
#define blk_trace_remove(q) (-ENOTTY) #define blk_trace_remove(q) (-ENOTTY)
#define blk_add_trace_msg(q, fmt, ...) do { } while (0)
#endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* CONFIG_BLK_DEV_IO_TRACE */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif #endif
...@@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in, ...@@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in,
ret = 0; ret = 0;
spliced = 0; spliced = 0;
while (len) { while (len && !spliced) {
ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
if (ret < 0) if (ret < 0)
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment