Commit 23da64b4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (28 commits)
  cfq-iosched: add close cooperator code
  cfq-iosched: log responsible 'cfqq' in idle timer arm
  cfq-iosched: tweak kick logic a bit more
  cfq-iosched: no need to save interrupts in cfq_kick_queue()
  brd: fix cacheflushing
  brd: support barriers
  swap: Remove code handling bio_alloc failure with __GFP_WAIT
  gfs2: Remove code handling bio_alloc failure with __GFP_WAIT
  ext4: Remove code handling bio_alloc failure with __GFP_WAIT
  dio: Remove code handling bio_alloc failure with __GFP_WAIT
  block: Remove code handling bio_alloc failure with __GFP_WAIT
  bio: add documentation to bio_alloc()
  splice: add helpers for locking pipe inode
  splice: remove generic_file_splice_write_nolock()
  ocfs2: fix i_mutex locking in ocfs2_splice_to_file()
  splice: fix i_mutex locking in generic_splice_write()
  splice: remove i_mutex locking in splice_from_pipe()
  splice: split up __splice_from_pipe()
  block: fix SG_IO to return a proper error value
  cfq-iosched: don't delay queue kick for a merged request
  ...
parents a23c218b a36e71f9
......@@ -1040,23 +1040,21 @@ Front merges are handled by the binary trees in AS and deadline schedulers.
iii. Plugging the queue to batch requests in anticipation of opportunities for
merge/sort optimizations
This is just the same as in 2.4 so far, though per-device unplugging
support is anticipated for 2.5. Also with a priority-based i/o scheduler,
such decisions could be based on request priorities.
Plugging is an approach that the current i/o scheduling algorithm resorts to so
that it collects up enough requests in the queue to be able to take
advantage of the sorting/merging logic in the elevator. If the
queue is empty when a request comes in, then it plugs the request queue
(sort of like plugging the bottom of a vessel to get fluid to build up)
(sort of like plugging the bath tub of a vessel to get fluid to build up)
till it fills up with a few more requests, before starting to service
the requests. This provides an opportunity to merge/sort the requests before
passing them down to the device. There are various conditions when the queue is
unplugged (to open up the flow again), either through a scheduled task or
could be on demand. For example wait_on_buffer sets the unplugging going
(by running tq_disk) so the read gets satisfied soon. So in the read case,
the queue gets explicitly unplugged as part of waiting for completion,
in fact all queues get unplugged as a side-effect.
through sync_buffer() running blk_run_address_space(mapping). Or the caller
can do it explicity through blk_unplug(bdev). So in the read case,
the queue gets explicitly unplugged as part of waiting for completion on that
buffer. For page driven IO, the address space ->sync_page() takes care of
doing the blk_run_address_space().
Aside:
This is kind of controversial territory, as it's not clear if plugging is
......@@ -1067,11 +1065,6 @@ Aside:
multi-page bios being queued in one shot, we may not need to wait to merge
a big request from the broken up pieces coming by.
Per-queue granularity unplugging (still a Todo) may help reduce some of the
concerns with just a single tq_disk flush approach. Something like
blk_kick_queue() to unplug a specific queue (right away ?)
or optionally, all queues, is in the plan.
4.4 I/O contexts
I/O contexts provide a dynamically allocated per process data area. They may
be used in I/O schedulers, and in the block layer (could be used for IO statis,
......
This diff is collapsed.
......@@ -319,9 +319,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
return -ENXIO;
bio = bio_alloc(GFP_KERNEL, 0);
if (!bio)
return -ENOMEM;
bio->bi_end_io = bio_end_empty_barrier;
bio->bi_private = &wait;
bio->bi_bdev = bdev;
......
......@@ -209,14 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
ssize_t ret = queue_var_store(&stats, page, count);
spin_lock_irq(q->queue_lock);
elv_quisce_start(q);
elv_quiesce_start(q);
if (stats)
queue_flag_set(QUEUE_FLAG_IO_STAT, q);
else
queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
elv_quisce_end(q);
elv_quiesce_end(q);
spin_unlock_irq(q->queue_lock);
return ret;
......
......@@ -70,8 +70,8 @@ void blk_queue_congestion_threshold(struct request_queue *q);
int blk_dev_init(void);
void elv_quisce_start(struct request_queue *q);
void elv_quisce_end(struct request_queue *q);
void elv_quiesce_start(struct request_queue *q);
void elv_quiesce_end(struct request_queue *q);
/*
......
This diff is collapsed.
......@@ -590,7 +590,7 @@ void elv_drain_elevator(struct request_queue *q)
/*
* Call with queue lock held, interrupts disabled
*/
void elv_quisce_start(struct request_queue *q)
void elv_quiesce_start(struct request_queue *q)
{
queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
......@@ -607,7 +607,7 @@ void elv_quisce_start(struct request_queue *q)
}
}
void elv_quisce_end(struct request_queue *q)
void elv_quiesce_end(struct request_queue *q)
{
queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
}
......@@ -1126,7 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
* Turn on BYPASS and drain all requests w/ elevator private data
*/
spin_lock_irq(q->queue_lock);
elv_quisce_start(q);
elv_quiesce_start(q);
/*
* Remember old elevator.
......@@ -1150,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
*/
elevator_exit(old_elevator);
spin_lock_irq(q->queue_lock);
elv_quisce_end(q);
elv_quiesce_end(q);
spin_unlock_irq(q->queue_lock);
blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
......
......@@ -146,8 +146,6 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 0);
if (!bio)
return -ENOMEM;
bio->bi_end_io = blk_ioc_discard_endio;
bio->bi_bdev = bdev;
......
......@@ -217,7 +217,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
struct bio *bio)
{
int ret = 0;
int r, ret = 0;
/*
* fill in all the output members
......@@ -242,7 +242,9 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
ret = -EFAULT;
}
blk_rq_unmap_user(bio);
r = blk_rq_unmap_user(bio);
if (!ret)
ret = r;
blk_put_request(rq);
return ret;
......
......@@ -275,8 +275,10 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
if (rw == READ) {
copy_from_brd(mem + off, brd, sector, len);
flush_dcache_page(page);
} else
} else {
flush_dcache_page(page);
copy_to_brd(brd, mem + off, sector, len);
}
kunmap_atomic(mem, KM_USER0);
out:
......@@ -436,6 +438,7 @@ static struct brd_device *brd_alloc(int i)
if (!brd->brd_queue)
goto out_free_dev;
blk_queue_make_request(brd->brd_queue, brd_make_request);
blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL);
blk_queue_max_sectors(brd->brd_queue, 1024);
blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
......
/*
* Copyright (C) 2004 Red Hat UK Ltd.
*
* This file is released under the GPL.
*/
#ifndef DM_BIO_LIST_H
#define DM_BIO_LIST_H
#include <linux/bio.h>
#ifdef CONFIG_BLOCK
struct bio_list {
struct bio *head;
struct bio *tail;
};
static inline int bio_list_empty(const struct bio_list *bl)
{
return bl->head == NULL;
}
static inline void bio_list_init(struct bio_list *bl)
{
bl->head = bl->tail = NULL;
}
#define bio_list_for_each(bio, bl) \
for (bio = (bl)->head; bio; bio = bio->bi_next)
static inline unsigned bio_list_size(const struct bio_list *bl)
{
unsigned sz = 0;
struct bio *bio;
bio_list_for_each(bio, bl)
sz++;
return sz;
}
static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = NULL;
if (bl->tail)
bl->tail->bi_next = bio;
else
bl->head = bio;
bl->tail = bio;
}
static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = bl->head;
bl->head = bio;
if (!bl->tail)
bl->tail = bio;
}
static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
{
if (!bl2->head)
return;
if (bl->tail)
bl->tail->bi_next = bl2->head;
else
bl->head = bl2->head;
bl->tail = bl2->tail;
}
static inline void bio_list_merge_head(struct bio_list *bl,
struct bio_list *bl2)
{
if (!bl2->head)
return;
if (bl->head)
bl2->tail->bi_next = bl->head;
else
bl->tail = bl2->tail;
bl->head = bl2->head;
}
static inline struct bio *bio_list_pop(struct bio_list *bl)
{
struct bio *bio = bl->head;
if (bio) {
bl->head = bl->head->bi_next;
if (!bl->head)
bl->tail = NULL;
bio->bi_next = NULL;
}
return bio;
}
static inline struct bio *bio_list_get(struct bio_list *bl)
{
struct bio *bio = bl->head;
bl->head = bl->tail = NULL;
return bio;
}
#endif /* CONFIG_BLOCK */
#endif
......@@ -15,8 +15,6 @@
#include <linux/device-mapper.h>
#include "dm-bio-list.h"
#define DM_MSG_PREFIX "delay"
struct delay_c {
......
......@@ -8,7 +8,6 @@
#include <linux/device-mapper.h>
#include "dm-path-selector.h"
#include "dm-bio-list.h"
#include "dm-bio-record.h"
#include "dm-uevent.h"
......
......@@ -5,7 +5,6 @@
* This file is released under the GPL.
*/
#include "dm-bio-list.h"
#include "dm-bio-record.h"
#include <linux/init.h>
......
......@@ -14,7 +14,6 @@
#include <linux/vmalloc.h>
#include "dm.h"
#include "dm-bio-list.h"
#define DM_MSG_PREFIX "region hash"
......
......@@ -22,7 +22,6 @@
#include <linux/workqueue.h>
#include "dm-exception-store.h"
#include "dm-bio-list.h"
#define DM_MSG_PREFIX "snapshots"
......
......@@ -6,7 +6,6 @@
*/
#include "dm.h"
#include "dm-bio-list.h"
#include "dm-uevent.h"
#include <linux/init.h>
......
......@@ -35,7 +35,6 @@
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include "md.h"
#include "dm-bio-list.h"
#include "raid1.h"
#include "bitmap.h"
......
......@@ -22,7 +22,6 @@
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include "md.h"
#include "dm-bio-list.h"
#include "raid10.h"
#include "bitmap.h"
......
......@@ -348,6 +348,24 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
return NULL;
}
/**
* bio_alloc - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
* @nr_iovecs: number of iovecs to pre-allocate
*
* Description:
* bio_alloc will allocate a bio and associated bio_vec array that can hold
* at least @nr_iovecs entries. Allocations will be done from the
* fs_bio_set. Also see @bio_alloc_bioset.
*
* If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
* a bio. This is due to the mempool guarantees. To make this work, callers
* must never allocate more than 1 bio at the time from this pool. Callers
* that need to allocate more than 1 bio must always submit the previously
* allocate bio for IO before attempting to allocate a new one. Failure to
* do so can cause livelocks under memory pressure.
*
**/
struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
{
struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
......
......@@ -547,7 +547,7 @@ static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
return err;
}
void do_thaw_all(unsigned long unused)
void do_thaw_all(struct work_struct *work)
{
struct super_block *sb;
char b[BDEVNAME_SIZE];
......@@ -567,6 +567,7 @@ void do_thaw_all(unsigned long unused)
goto restart;
}
spin_unlock(&sb_lock);
kfree(work);
printk(KERN_WARNING "Emergency Thaw complete\n");
}
......@@ -577,7 +578,13 @@ void do_thaw_all(unsigned long unused)
*/
void emergency_thaw_all(void)
{
pdflush_operation(do_thaw_all, 0);
struct work_struct *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work) {
INIT_WORK(work, do_thaw_all);
schedule_work(work);
}
}
/**
......
......@@ -307,8 +307,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, nr_vecs);
if (bio == NULL)
return -ENOMEM;
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
......
......@@ -2416,8 +2416,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
len = ee_len;
bio = bio_alloc(GFP_NOIO, len);
if (!bio)
return -ENOMEM;
bio->bi_sector = ee_pblock;
bio->bi_bdev = inode->i_sb->s_bdev;
......
......@@ -272,11 +272,6 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
lock_page(page);
bio = bio_alloc(GFP_NOFS, 1);
if (unlikely(!bio)) {
__free_page(page);
return -ENOBUFS;
}
bio->bi_sector = sector * (sb->s_blocksize >> 9);
bio->bi_bdev = sb->s_bdev;
bio_add_page(bio, page, PAGE_SIZE, 0);
......
......@@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode)
spin_lock(&inode_lock);
}
/*
* We rarely want to lock two inodes that do not have a parent/child
* relationship (such as directory, child inode) simultaneously. The
* vast majority of file systems should be able to get along fine
* without this. Do not use these functions except as a last resort.
*/
void inode_double_lock(struct inode *inode1, struct inode *inode2)
{
if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
if (inode1)
mutex_lock(&inode1->i_mutex);
else if (inode2)
mutex_lock(&inode2->i_mutex);
return;
}
if (inode1 < inode2) {
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
}
}
EXPORT_SYMBOL(inode_double_lock);
void inode_double_unlock(struct inode *inode1, struct inode *inode2)
{
if (inode1)
mutex_unlock(&inode1->i_mutex);
if (inode2 && inode2 != inode1)
mutex_unlock(&inode2->i_mutex);
}
EXPORT_SYMBOL(inode_double_unlock);
static __initdata unsigned long ihash_entries;
static int __init set_ihash_entries(char *str)
{
......
......@@ -1912,6 +1912,22 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
return written ? written : ret;
}
static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
struct file *out,
struct splice_desc *sd)
{
int ret;
ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
sd->total_len, 0, NULL);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
return splice_from_pipe_feed(pipe, sd, pipe_to_file);
}
static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out,
loff_t *ppos,
......@@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
unsigned int flags)
{
int ret;
struct inode *inode = out->f_path.dentry->d_inode;
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
struct splice_desc sd = {
.total_len = len,
.flags = flags,
.pos = *ppos,
.u.file = out,
};
mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
(unsigned int)len,
out->f_path.dentry->d_name.len,
out->f_path.dentry->d_name.name);
mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
if (pipe->inode)
mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
ret = ocfs2_rw_lock(inode, 1);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
splice_from_pipe_begin(&sd);
do {
ret = splice_from_pipe_next(pipe, &sd);
if (ret <= 0)
break;
ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,
NULL);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
}
mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
ret = ocfs2_rw_lock(inode, 1);
if (ret < 0)
mlog_errno(ret);
else {
ret = ocfs2_splice_to_file(pipe, out, &sd);
ocfs2_rw_unlock(inode, 1);
}
mutex_unlock(&inode->i_mutex);
} while (ret > 0);
splice_from_pipe_end(pipe, &sd);
if (pipe->inode)
mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
if (pipe->inode)
mutex_unlock(&pipe->inode->i_mutex);
out_unlock:
ocfs2_rw_unlock(inode, 1);
out:
mutex_unlock(&inode->i_mutex);
if (sd.num_spliced)
ret = sd.num_spliced;
if (ret > 0) {
unsigned long nr_pages;
*ppos += ret;
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
/*
* If file or inode is SYNC and we actually wrote some data,
* sync it.
*/
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err;
mutex_lock(&inode->i_mutex);
err = ocfs2_rw_lock(inode, 1);
if (err < 0) {
mlog_errno(err);
} else {
err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
ocfs2_rw_unlock(inode, 1);
}
mutex_unlock(&inode->i_mutex);
if (err)
ret = err;
}
balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
}
mlog_exit(ret);
return ret;
......
......@@ -37,6 +37,42 @@
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
*/
static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
{
if (pipe->inode)
mutex_lock_nested(&pipe->inode->i_mutex, subclass);
}
void pipe_lock(struct pipe_inode_info *pipe)
{
/*
* pipe_lock() nests non-pipe inode locks (for writing to a file)
*/
pipe_lock_nested(pipe, I_MUTEX_PARENT);
}
EXPORT_SYMBOL(pipe_lock);
void pipe_unlock(struct pipe_inode_info *pipe)
{
if (pipe->inode)
mutex_unlock(&pipe->inode->i_mutex);
}
EXPORT_SYMBOL(pipe_unlock);
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
BUG_ON(pipe1 == pipe2);
if (pipe1 < pipe2) {
pipe_lock_nested(pipe1, I_MUTEX_PARENT);
pipe_lock_nested(pipe2, I_MUTEX_CHILD);
} else {
pipe_lock_nested(pipe2, I_MUTEX_CHILD);
pipe_lock_nested(pipe1, I_MUTEX_PARENT);
}
}
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe)
{
......@@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
* is considered a noninteractive wait:
*/
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
if (pipe->inode)
mutex_unlock(&pipe->inode->i_mutex);
pipe_unlock(pipe);
schedule();
finish_wait(&pipe->wait, &wait);
if (pipe->inode)
mutex_lock(&pipe->inode->i_mutex);
pipe_lock(pipe);
}
static int
......
This diff is collapsed.
......@@ -504,6 +504,115 @@ static inline int bio_has_data(struct bio *bio)
return bio && bio->bi_io_vec != NULL;
}
/*
* BIO list managment for use by remapping drivers (e.g. DM or MD).
*
* A bio_list anchors a singly-linked list of bios chained through the bi_next
* member of the bio. The bio_list also caches the last list member to allow
* fast access to the tail.
*/
struct bio_list {
struct bio *head;
struct bio *tail;
};
static inline int bio_list_empty(const struct bio_list *bl)
{
return bl->head == NULL;
}
static inline void bio_list_init(struct bio_list *bl)
{
bl->head = bl->tail = NULL;
}
#define bio_list_for_each(bio, bl) \
for (bio = (bl)->head; bio; bio = bio->bi_next)
static inline unsigned bio_list_size(const struct bio_list *bl)
{
unsigned sz = 0;
struct bio *bio;
bio_list_for_each(bio, bl)
sz++;
return sz;
}
static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = NULL;
if (bl->tail)
bl->tail->bi_next = bio;
else
bl->head = bio;
bl->tail = bio;
}
static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = bl->head;
bl->head = bio;
if (!bl->tail)
bl->tail = bio;
}
static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
{
if (!bl2->head)
return;
if (bl->tail)
bl->tail->bi_next = bl2->head;
else
bl->head = bl2->head;
bl->tail = bl2->tail;
}
static inline void bio_list_merge_head(struct bio_list *bl,
struct bio_list *bl2)
{
if (!bl2->head)
return;
if (bl->head)
bl2->tail->bi_next = bl->head;
else
bl->tail = bl2->tail;
bl->head = bl2->head;
}
static inline struct bio *bio_list_pop(struct bio_list *bl)
{
struct bio *bio = bl->head;
if (bio) {
bl->head = bl->head->bi_next;
if (!bl->head)
bl->tail = NULL;
bio->bi_next = NULL;
}
return bio;
}
static inline struct bio *bio_list_get(struct bio_list *bl)
{
struct bio *bio = bl->head;
bl->head = bl->tail = NULL;
return bio;
}
#if defined(CONFIG_BLK_DEV_INTEGRITY)
#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
......
......@@ -87,6 +87,60 @@ struct inodes_stat_t {
*/
#define FMODE_NOCMTIME ((__force fmode_t)2048)
/*
* The below are the various read and write types that we support. Some of
* them include behavioral modifiers that send information down to the
* block layer and IO scheduler. Terminology:
*
* The block layer uses device plugging to defer IO a little bit, in
* the hope that we will see more IO very shortly. This increases
* coalescing of adjacent IO and thus reduces the number of IOs we
* have to send to the device. It also allows for better queuing,
* if the IO isn't mergeable. If the caller is going to be waiting
* for the IO, then he must ensure that the device is unplugged so
* that the IO is dispatched to the driver.
*
* All IO is handled async in Linux. This is fine for background
* writes, but for reads or writes that someone waits for completion
* on, we want to notify the block layer and IO scheduler so that they
* know about it. That allows them to make better scheduling
* decisions. So when the below references 'sync' and 'async', it
* is referencing this priority hint.
*
* With that in mind, the available types are:
*
* READ A normal read operation. Device will be plugged.
* READ_SYNC A synchronous read. Device is not plugged, caller can
* immediately wait on this read without caring about
* unplugging.
* READA Used for read-ahead operations. Lower priority, and the
* block layer could (in theory) choose to ignore this
* request if it runs into resource problems.
* WRITE A normal async write. Device will be plugged.
* SWRITE Like WRITE, but a special case for ll_rw_block() that
* tells it to lock the buffer first. Normally a buffer
* must be locked before doing IO.
* WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down
* the hint that someone will be waiting on this IO
* shortly. The device must still be unplugged explicitly,
* WRITE_SYNC_PLUG does not do this as we could be
* submitting more writes before we actually wait on any
* of them.
* WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device
* immediately after submission. The write equivalent
* of READ_SYNC.
* WRITE_ODIRECT Special case write for O_DIRECT only.
* SWRITE_SYNC
* SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
* See SWRITE.
* WRITE_BARRIER Like WRITE, but tells the block layer that all
* previously submitted writes must be safely on storage
* before this one is started. Also guarantees that when
* this write is complete, it itself is also safely on
* storage. Prevents reordering of writes on both sides
* of this IO.
*
*/
#define RW_MASK 1
#define RWA_MASK 2
#define READ 0
......@@ -102,6 +156,11 @@ struct inodes_stat_t {
(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
/*
* These aren't really reads or writes, they pass down information about
* parts of device that are now unused by the file system.
*/
#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
......@@ -738,9 +797,6 @@ enum inode_i_mutex_lock_class
I_MUTEX_QUOTA
};
extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
/*
* NOTE: in a 32bit arch with a preemptable kernel and
* an UP compile the i_size_read/write must be atomic
......@@ -2150,8 +2206,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, loff_t *, size_t len, unsigned int flags);
extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
......
......@@ -134,6 +134,11 @@ struct pipe_buf_operations {
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE
/* Pipe lock and unlock operations */
void pipe_lock(struct pipe_inode_info *);
void pipe_unlock(struct pipe_inode_info *);
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe);
......
......@@ -36,6 +36,8 @@ struct splice_desc {
void *data; /* cookie */
} u;
loff_t pos; /* file position */
size_t num_spliced; /* number of bytes already spliced */
bool need_wakeup; /* need to wake up writer */
};
struct partial_page {
......@@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
splice_actor *);
extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
struct splice_desc *, splice_actor *);
extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
splice_actor *);
extern int splice_from_pipe_next(struct pipe_inode_info *,
struct splice_desc *);
extern void splice_from_pipe_begin(struct splice_desc *);
extern void splice_from_pipe_end(struct pipe_inode_info *,
struct splice_desc *);
extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
struct splice_desc *);
extern ssize_t splice_to_pipe(struct pipe_inode_info *,
struct splice_pipe_desc *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
......
......@@ -64,8 +64,6 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
struct bio *bio;
bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
if (!bio)
return -ENOMEM;
bio->bi_sector = page_off * (PAGE_SIZE >> 9);
bio->bi_bdev = resume_bdev;
bio->bi_end_io = end_swap_bio_read;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment