Commit e65e1535 authored by Miao Xie's avatar Miao Xie Committed by Chris Mason

btrfs: fix panic caused by direct IO

btrfs paniced when we write >64KB data by direct IO at one time.

Reproduce steps:
 # mkfs.btrfs /dev/sda5 /dev/sda6
 # mount /dev/sda5 /mnt
 # dd if=/dev/zero of=/mnt/tmpfile bs=100K count=1 oflag=direct

Then btrfs paniced:
mapping failed logical 1103155200 bio len 69632 len 12288
------------[ cut here ]------------
kernel BUG at fs/btrfs/volumes.c:3010!
[SNIP]
Pid: 1992, comm: btrfs-worker-0 Not tainted 2.6.37-rc1 #1 D2399/PRIMERGY
RIP: 0010:[<ffffffffa03d1462>]  [<ffffffffa03d1462>] btrfs_map_bio+0x202/0x210 [btrfs]
[SNIP]
Call Trace:
 [<ffffffffa03ab3eb>] __btrfs_submit_bio_done+0x1b/0x20 [btrfs]
 [<ffffffffa03a35ff>] run_one_async_done+0x9f/0xb0 [btrfs]
 [<ffffffffa03d3d20>] run_ordered_completions+0x80/0xc0 [btrfs]
 [<ffffffffa03d45a4>] worker_loop+0x154/0x5f0 [btrfs]
 [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs]
 [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs]
 [<ffffffff81083216>] kthread+0x96/0xa0
 [<ffffffff8100cec4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81083180>] ? kthread+0x0/0xa0
 [<ffffffff8100cec0>] ? kernel_thread_helper+0x0/0x10

We fix this problem by splitting bios when we submit bios.
Reported-by: default avatarTsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: default avatarMiao Xie <miaox@cn.fujitsu.com>
Tested-by: default avatarTsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 88f794ed
...@@ -5535,13 +5535,21 @@ struct btrfs_dio_private { ...@@ -5535,13 +5535,21 @@ struct btrfs_dio_private {
u64 bytes; u64 bytes;
u32 *csums; u32 *csums;
void *private; void *private;
/* number of bios pending for this dio */
atomic_t pending_bios;
/* IO errors */
int errors;
struct bio *orig_bio;
}; };
static void btrfs_endio_direct_read(struct bio *bio, int err) static void btrfs_endio_direct_read(struct bio *bio, int err)
{ {
struct btrfs_dio_private *dip = bio->bi_private;
struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
struct bio_vec *bvec = bio->bi_io_vec; struct bio_vec *bvec = bio->bi_io_vec;
struct btrfs_dio_private *dip = bio->bi_private;
struct inode *inode = dip->inode; struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start; u64 start;
...@@ -5684,6 +5692,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, ...@@ -5684,6 +5692,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
return 0; return 0;
} }
static void btrfs_end_dio_bio(struct bio *bio, int err)
{
struct btrfs_dio_private *dip = bio->bi_private;
if (err) {
printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
"disk_bytenr %lu len %u err no %d\n",
dip->inode->i_ino, bio->bi_rw, bio->bi_sector,
bio->bi_size, err);
dip->errors = 1;
/*
* before atomic variable goto zero, we must make sure
* dip->errors is perceived to be set.
*/
smp_mb__before_atomic_dec();
}
/* if there are more bios still pending for this dio, just exit */
if (!atomic_dec_and_test(&dip->pending_bios))
goto out;
if (dip->errors)
bio_io_error(dip->orig_bio);
else {
set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
bio_endio(dip->orig_bio, 0);
}
out:
bio_put(bio);
}
static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
u64 first_sector, gfp_t gfp_flags)
{
int nr_vecs = bio_get_nr_vecs(bdev);
return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
}
static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
int rw, u64 file_offset, int skip_sum,
u32 *csums)
{
int write = rw & REQ_WRITE;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
bio_get(bio);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
if (ret)
goto err;
if (write && !skip_sum) {
ret = btrfs_wq_submit_bio(root->fs_info,
inode, rw, bio, 0, 0,
file_offset,
__btrfs_submit_bio_start_direct_io,
__btrfs_submit_bio_done);
goto err;
} else if (!skip_sum)
btrfs_lookup_bio_sums_dio(root, inode, bio,
file_offset, csums);
ret = btrfs_map_bio(root, rw, bio, 0, 1);
err:
bio_put(bio);
return ret;
}
static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int skip_sum)
{
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
struct bio *bio;
struct bio *orig_bio = dip->orig_bio;
struct bio_vec *bvec = orig_bio->bi_io_vec;
u64 start_sector = orig_bio->bi_sector;
u64 file_offset = dip->logical_offset;
u64 submit_len = 0;
u64 map_length;
int nr_pages = 0;
u32 *csums = dip->csums;
int ret = 0;
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
if (!bio)
return -ENOMEM;
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
atomic_inc(&dip->pending_bios);
map_length = orig_bio->bi_size;
ret = btrfs_map_block(map_tree, READ, start_sector << 9,
&map_length, NULL, 0);
if (ret) {
bio_put(bio);
return -EIO;
}
while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
if (unlikely(map_length < submit_len + bvec->bv_len ||
bio_add_page(bio, bvec->bv_page, bvec->bv_len,
bvec->bv_offset) < bvec->bv_len)) {
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
* we inc the count. Otherwise, the dip might get freed
* before we're done setting it up
*/
atomic_inc(&dip->pending_bios);
ret = __btrfs_submit_dio_bio(bio, inode, rw,
file_offset, skip_sum,
csums);
if (ret) {
bio_put(bio);
atomic_dec(&dip->pending_bios);
goto out_err;
}
if (!skip_sum)
csums = csums + nr_pages;
start_sector += submit_len >> 9;
file_offset += submit_len;
submit_len = 0;
nr_pages = 0;
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
start_sector, GFP_NOFS);
if (!bio)
goto out_err;
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
map_length = orig_bio->bi_size;
ret = btrfs_map_block(map_tree, READ, start_sector << 9,
&map_length, NULL, 0);
if (ret) {
bio_put(bio);
goto out_err;
}
} else {
submit_len += bvec->bv_len;
nr_pages ++;
bvec++;
}
}
ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
csums);
if (!ret)
return 0;
bio_put(bio);
out_err:
dip->errors = 1;
/*
* before atomic variable goto zero, we must
* make sure dip->errors is perceived to be set.
*/
smp_mb__before_atomic_dec();
if (atomic_dec_and_test(&dip->pending_bios))
bio_io_error(dip->orig_bio);
/* bio_end_io() will handle error, so we needn't return it */
return 0;
}
static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
loff_t file_offset) loff_t file_offset)
{ {
...@@ -5723,33 +5901,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, ...@@ -5723,33 +5901,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
dip->disk_bytenr = (u64)bio->bi_sector << 9; dip->disk_bytenr = (u64)bio->bi_sector << 9;
bio->bi_private = dip; bio->bi_private = dip;
dip->errors = 0;
dip->orig_bio = bio;
atomic_set(&dip->pending_bios, 0);
if (write) if (write)
bio->bi_end_io = btrfs_endio_direct_write; bio->bi_end_io = btrfs_endio_direct_write;
else else
bio->bi_end_io = btrfs_endio_direct_read; bio->bi_end_io = btrfs_endio_direct_read;
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
if (ret) if (!ret)
goto free_ordered;
if (write && !skip_sum) {
ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
inode, rw, bio, 0, 0,
dip->logical_offset,
__btrfs_submit_bio_start_direct_io,
__btrfs_submit_bio_done);
if (ret)
goto free_ordered;
return; return;
} else if (!skip_sum)
btrfs_lookup_bio_sums_dio(root, inode, bio,
dip->logical_offset, dip->csums);
ret = btrfs_map_bio(root, rw, bio, 0, 1);
if (ret)
goto free_ordered;
return;
free_ordered: free_ordered:
/* /*
* If this is a write, we need to clean up the reserved space and kill * If this is a write, we need to clean up the reserved space and kill
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment