Commit 1737ddec authored by Jens Axboe's avatar Jens Axboe Committed by Linus Torvalds

[PATCH] bio_copy_user() cleanups and fixes

blk_rq_map_user() is a bit of a hack currently, since it drops back to
kmalloc() if bio_map_user() fails.  This is unfortunate since it means we
do no real segment or size checking (and the request segment counts contain
crap, already found one bug in a scsi lld).  It's also pretty nasty for >
PAGE_SIZE requests, as we attempt to do higher order page allocations.
Even worse still, ide-cd will drop back to PIO for non-sg/bio requests. 
All in all, very suboptimal.

This patch adds bio_copy_user() which simply sets up a bio with kernel
pages and copies data as needed for reads and writes.  It also changes
bio_map_user() to return an error pointer like bio_copy_user(), so we can
return something sane to the user instead of always -ENOMEM.
Signed-off-by: default avatarJens Axboe <axboe@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 8e7e3e3e
......@@ -1823,50 +1823,43 @@ EXPORT_SYMBOL(blk_insert_request);
struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf,
unsigned int len)
{
struct request *rq = NULL;
char *buf = NULL;
unsigned long uaddr;
struct request *rq;
struct bio *bio;
int ret;
if (len > (q->max_sectors << 9))
return ERR_PTR(-EINVAL);
if ((!len && ubuf) || (len && !ubuf))
return ERR_PTR(-EINVAL);
rq = blk_get_request(q, rw, __GFP_WAIT);
if (!rq)
return ERR_PTR(-ENOMEM);
bio = bio_map_user(q, NULL, (unsigned long) ubuf, len, rw == READ);
if (!bio) {
int bytes = (len + 511) & ~511;
buf = kmalloc(bytes, q->bounce_gfp | GFP_USER);
if (!buf) {
ret = -ENOMEM;
goto fault;
}
if (rw == WRITE) {
if (copy_from_user(buf, ubuf, len)) {
ret = -EFAULT;
goto fault;
}
} else
memset(buf, 0, len);
}
/*
* if alignment requirement is satisfied, map in user pages for
* direct dma. else, set up kernel bounce buffers
*/
uaddr = (unsigned long) ubuf;
if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
bio = bio_map_user(q, NULL, uaddr, len, rw == READ);
else
bio = bio_copy_user(q, uaddr, len, rw == READ);
rq->bio = rq->biotail = bio;
if (rq->bio)
if (!IS_ERR(bio)) {
rq->bio = rq->biotail = bio;
blk_rq_bio_prep(q, rq, bio);
rq->buffer = rq->data = buf;
rq->data_len = len;
return rq;
fault:
if (buf)
kfree(buf);
if (bio)
bio_unmap_user(bio, 1);
if (rq)
blk_put_request(rq);
rq->buffer = rq->data = NULL;
rq->data_len = len;
return rq;
}
return ERR_PTR(ret);
/*
* bio is the err-ptr
*/
blk_put_request(rq);
return (struct request *) bio;
}
EXPORT_SYMBOL(blk_rq_map_user);
......@@ -1880,18 +1873,15 @@ EXPORT_SYMBOL(blk_rq_map_user);
* Description:
* Unmap a request previously mapped by blk_rq_map_user().
*/
int blk_rq_unmap_user(struct request *rq, void __user *ubuf, struct bio *bio,
unsigned int ulen)
int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen)
{
const int read = rq_data_dir(rq) == READ;
int ret = 0;
if (bio)
bio_unmap_user(bio, read);
if (rq->buffer) {
if (read && copy_to_user(ubuf, rq->buffer, ulen))
ret = -EFAULT;
kfree(rq->buffer);
if (bio) {
if (bio_flagged(bio, BIO_USER_MAPPED))
bio_unmap_user(bio);
else
ret = bio_uncopy_user(bio);
}
blk_put_request(rq);
......
......@@ -211,7 +211,7 @@ static int sg_io(request_queue_t *q, struct gendisk *bd_disk,
hdr->sb_len_wr = len;
}
if (blk_rq_unmap_user(rq, hdr->dxferp, bio, hdr->dxfer_len))
if (blk_rq_unmap_user(rq, bio, hdr->dxfer_len))
return -EFAULT;
/* may not have succeeded, but output values written to control
......
......@@ -2015,7 +2015,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
cdi->last_sense = s->sense_key;
}
if (blk_rq_unmap_user(rq, ubuf, bio, len))
if (blk_rq_unmap_user(rq, bio, len))
ret = -EFAULT;
if (ret)
......
......@@ -1959,13 +1959,17 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
* sg request
*/
if (rq->bio) {
if (rq->data_len & 3) {
printk("%s: block pc not aligned, len=%d\n", drive->name, rq->data_len);
cdrom_end_request(drive, 0);
return ide_stopped;
}
info->dma = drive->using_dma;
int mask = drive->queue->dma_alignment;
unsigned long addr = (unsigned long) page_address(bio_page(rq->bio));
info->cmd = rq_data_dir(rq);
info->dma = drive->using_dma;
/*
* check if dma is safe
*/
if ((rq->data_len & mask) || (addr & mask))
info->dma = 0;
}
/* Start sending the command to the drive. */
......@@ -3133,7 +3137,7 @@ int ide_cdrom_setup (ide_drive_t *drive)
int nslots;
blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn);
blk_queue_dma_alignment(drive->queue, 3);
blk_queue_dma_alignment(drive->queue, 31);
drive->queue->unplug_delay = (1 * HZ) / 1000;
if (!drive->queue->unplug_delay)
drive->queue->unplug_delay = 1;
......
......@@ -376,6 +376,115 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
len, offset);
}
/**
* bio_uncopy_user - finish previously mapped bio
* @bio: bio being terminated
*
* Free pages allocated from bio_copy_user() and write back data
* to user space in case of a read.
*/
int bio_uncopy_user(struct bio *bio)
{
struct bio_vec *bvec;
int i, ret = 0;
if (bio_data_dir(bio) == READ) {
char *uaddr = bio->bi_private;
__bio_for_each_segment(bvec, bio, i, 0) {
char *addr = page_address(bvec->bv_page);
if (!ret && copy_to_user(uaddr, addr, bvec->bv_len))
ret = -EFAULT;
__free_page(bvec->bv_page);
uaddr += bvec->bv_len;
}
}
bio_put(bio);
return ret;
}
/**
* bio_copy_user - copy user data to bio
* @q: destination block queue
* @uaddr: start of user address
* @len: length in bytes
* @write_to_vm: bool indicating writing to pages or not
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
* call bio_uncopy_user() on io completion.
*/
struct bio *bio_copy_user(request_queue_t *q, unsigned long uaddr,
unsigned int len, int write_to_vm)
{
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
struct bio_vec *bvec;
struct page *page;
struct bio *bio;
int i, ret;
bio = bio_alloc(GFP_KERNEL, end - start);
if (!bio)
return ERR_PTR(-ENOMEM);
ret = 0;
while (len) {
unsigned int bytes = PAGE_SIZE;
if (bytes > len)
bytes = len;
page = alloc_page(q->bounce_gfp | GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
break;
}
if (__bio_add_page(q, bio, page, bytes, 0) < bytes) {
ret = -EINVAL;
break;
}
len -= bytes;
}
/*
* success
*/
if (!ret) {
if (!write_to_vm) {
bio->bi_rw |= (1 << BIO_RW);
/*
* for a write, copy in data to kernel pages
*/
ret = -EFAULT;
bio_for_each_segment(bvec, bio, i) {
char *addr = page_address(bvec->bv_page);
if (copy_from_user(addr, (char *) uaddr, bvec->bv_len))
goto cleanup;
}
}
bio->bi_private = (void *) uaddr;
return bio;
}
/*
* cleanup
*/
cleanup:
bio_for_each_segment(bvec, bio, i)
__free_page(bvec->bv_page);
bio_put(bio);
return ERR_PTR(ret);
}
static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
unsigned long uaddr, unsigned int len,
int write_to_vm)
......@@ -392,12 +501,13 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
* size for now, in the future we can relax this restriction
*/
if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
return NULL;
return ERR_PTR(-EINVAL);
bio = bio_alloc(GFP_KERNEL, nr_pages);
if (!bio)
return NULL;
return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out;
......@@ -446,11 +556,12 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
if (!write_to_vm)
bio->bi_rw |= (1 << BIO_RW);
bio->bi_flags |= (1 << BIO_USER_MAPPED);
return bio;
out:
kfree(pages);
bio_put(bio);
return NULL;
return ERR_PTR(ret);
}
/**
......@@ -461,7 +572,7 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
* @write_to_vm: bool indicating writing to pages or not
*
* Map the user space address into a bio suitable for io to a block
* device.
* device. Returns an error pointer in case of error.
*/
struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
unsigned long uaddr, unsigned int len, int write_to_vm)
......@@ -470,26 +581,29 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm);
if (bio) {
/*
* subtle -- if __bio_map_user() ended up bouncing a bio,
* it would normally disappear when its bi_end_io is run.
* however, we need it for the unmap, so grab an extra
* reference to it
*/
bio_get(bio);
if (IS_ERR(bio))
return bio;
if (bio->bi_size < len) {
bio_endio(bio, bio->bi_size, 0);
bio_unmap_user(bio, 0);
return NULL;
}
}
/*
* subtle -- if __bio_map_user() ended up bouncing a bio,
* it would normally disappear when its bi_end_io is run.
* however, we need it for the unmap, so grab an extra
* reference to it
*/
bio_get(bio);
return bio;
if (bio->bi_size == len)
return bio;
/*
* don't support partial mappings
*/
bio_endio(bio, bio->bi_size, 0);
bio_unmap_user(bio);
return ERR_PTR(-EINVAL);
}
static void __bio_unmap_user(struct bio *bio, int write_to_vm)
static void __bio_unmap_user(struct bio *bio)
{
struct bio_vec *bvec;
int i;
......@@ -510,7 +624,7 @@ static void __bio_unmap_user(struct bio *bio, int write_to_vm)
* make sure we dirty pages we wrote to
*/
__bio_for_each_segment(bvec, bio, i, 0) {
if (write_to_vm)
if (bio_data_dir(bio) == READ)
set_page_dirty_lock(bvec->bv_page);
page_cache_release(bvec->bv_page);
......@@ -522,17 +636,15 @@ static void __bio_unmap_user(struct bio *bio, int write_to_vm)
/**
* bio_unmap_user - unmap a bio
* @bio: the bio being unmapped
* @write_to_vm: bool indicating whether pages were written to
*
* Unmap a bio previously mapped by bio_map_user(). The @write_to_vm
* must be the same as passed into bio_map_user(). Must be called with
* Unmap a bio previously mapped by bio_map_user(). Must be called with
* a process context.
*
* bio_unmap_user() may sleep.
*/
void bio_unmap_user(struct bio *bio, int write_to_vm)
void bio_unmap_user(struct bio *bio)
{
__bio_unmap_user(bio, write_to_vm);
__bio_unmap_user(bio);
bio_put(bio);
}
......@@ -863,3 +975,5 @@ EXPORT_SYMBOL(bio_unmap_user);
EXPORT_SYMBOL(bio_pair_release);
EXPORT_SYMBOL(bio_split);
EXPORT_SYMBOL(bio_split_pool);
EXPORT_SYMBOL(bio_copy_user);
EXPORT_SYMBOL(bio_uncopy_user);
......@@ -120,6 +120,7 @@ struct bio {
#define BIO_SEG_VALID 3 /* nr_hw_seg valid */
#define BIO_CLONED 4 /* doesn't own data */
#define BIO_BOUNCED 5 /* bio is a bounce bio */
#define BIO_USER_MAPPED 6 /* contains user pages */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
/*
......@@ -264,9 +265,11 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_get_nr_vecs(struct block_device *);
extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
unsigned long, unsigned int, int);
extern void bio_unmap_user(struct bio *, int);
extern void bio_unmap_user(struct bio *);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
extern int bio_uncopy_user(struct bio *);
#ifdef CONFIG_HIGHMEM
/*
......
......@@ -524,7 +524,7 @@ extern void __blk_stop_queue(request_queue_t *q);
extern void blk_run_queue(request_queue_t *);
extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int);
extern int blk_rq_unmap_user(struct request *, void __user *, struct bio *, unsigned int);
extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int);
extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *);
static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment