Commit 89e524c0 authored by Jan Kara's avatar Jan Kara Committed by Jens Axboe

loop: Fix mount(2) failure due to race with LOOP_SET_FD

Commit 33ec3e53 ("loop: Don't change loop device under exclusive
opener") made LOOP_SET_FD ioctl acquire exclusive block device reference
while it updates loop device binding. However this can make perfectly
valid mount(2) fail with EBUSY due to racing LOOP_SET_FD holding
temporarily the exclusive bdev reference in cases like this:

for i in {a..z}{a..z}; do
        dd if=/dev/zero of=$i.image bs=1k count=0 seek=1024
        mkfs.ext2 $i.image
        mkdir mnt$i
done

echo "Run"
for i in {a..z}{a..z}; do
        mount -o loop -t ext2 $i.image mnt$i &
done

Fix the problem by not getting full exclusive bdev reference in
LOOP_SET_FD but instead just mark the bdev as being claimed while we
update the binding information. This just blocks new exclusive openers
instead of failing them with EBUSY thus fixing the problem.

Fixes: 33ec3e53 ("loop: Don't change loop device under exclusive opener")
Cc: stable@vger.kernel.org
Tested-by: default avatarKai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 71d6c505
...@@ -924,6 +924,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, ...@@ -924,6 +924,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct file *file; struct file *file;
struct inode *inode; struct inode *inode;
struct address_space *mapping; struct address_space *mapping;
struct block_device *claimed_bdev = NULL;
int lo_flags = 0; int lo_flags = 0;
int error; int error;
loff_t size; loff_t size;
...@@ -942,11 +943,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, ...@@ -942,11 +943,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
* here to avoid changing device under exclusive owner. * here to avoid changing device under exclusive owner.
*/ */
if (!(mode & FMODE_EXCL)) { if (!(mode & FMODE_EXCL)) {
bdgrab(bdev); claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
error = blkdev_get(bdev, mode | FMODE_EXCL, loop_set_fd); if (IS_ERR(claimed_bdev)) {
if (error) error = PTR_ERR(claimed_bdev);
goto out_putf; goto out_putf;
} }
}
error = mutex_lock_killable(&loop_ctl_mutex); error = mutex_lock_killable(&loop_ctl_mutex);
if (error) if (error)
...@@ -1015,15 +1017,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, ...@@ -1015,15 +1017,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&loop_ctl_mutex);
if (partscan) if (partscan)
loop_reread_partitions(lo, bdev); loop_reread_partitions(lo, bdev);
if (!(mode & FMODE_EXCL)) if (claimed_bdev)
blkdev_put(bdev, mode | FMODE_EXCL); bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
return 0; return 0;
out_unlock: out_unlock:
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&loop_ctl_mutex);
out_bdev: out_bdev:
if (!(mode & FMODE_EXCL)) if (claimed_bdev)
blkdev_put(bdev, mode | FMODE_EXCL); bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
out_putf: out_putf:
fput(file); fput(file);
out: out:
......
...@@ -1181,8 +1181,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) ...@@ -1181,8 +1181,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
* Pointer to the block device containing @bdev on success, ERR_PTR() * Pointer to the block device containing @bdev on success, ERR_PTR()
* value on failure. * value on failure.
*/ */
static struct block_device *bd_start_claiming(struct block_device *bdev, struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
void *holder)
{ {
struct gendisk *disk; struct gendisk *disk;
struct block_device *whole; struct block_device *whole;
...@@ -1229,6 +1228,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, ...@@ -1229,6 +1228,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
return ERR_PTR(err); return ERR_PTR(err);
} }
} }
EXPORT_SYMBOL(bd_start_claiming);
static void bd_clear_claiming(struct block_device *whole, void *holder)
{
lockdep_assert_held(&bdev_lock);
/* tell others that we're done */
BUG_ON(whole->bd_claiming != holder);
whole->bd_claiming = NULL;
wake_up_bit(&whole->bd_claiming, 0);
}
/**
* bd_finish_claiming - finish claiming of a block device
* @bdev: block device of interest
* @whole: whole block device (returned from bd_start_claiming())
* @holder: holder that has claimed @bdev
*
* Finish exclusive open of a block device. Mark the device as exlusively
* open by the holder and wake up all waiters for exclusive open to finish.
*/
void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
void *holder)
{
spin_lock(&bdev_lock);
BUG_ON(!bd_may_claim(bdev, whole, holder));
/*
* Note that for a whole device bd_holders will be incremented twice,
* and bd_holder will be set to bd_may_claim before being set to holder
*/
whole->bd_holders++;
whole->bd_holder = bd_may_claim;
bdev->bd_holders++;
bdev->bd_holder = holder;
bd_clear_claiming(whole, holder);
spin_unlock(&bdev_lock);
}
EXPORT_SYMBOL(bd_finish_claiming);
/**
* bd_abort_claiming - abort claiming of a block device
* @bdev: block device of interest
* @whole: whole block device (returned from bd_start_claiming())
* @holder: holder that has claimed @bdev
*
* Abort claiming of a block device when the exclusive open failed. This can be
* also used when exclusive open is not actually desired and we just needed
* to block other exclusive openers for a while.
*/
void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
void *holder)
{
spin_lock(&bdev_lock);
bd_clear_claiming(whole, holder);
spin_unlock(&bdev_lock);
}
EXPORT_SYMBOL(bd_abort_claiming);
#ifdef CONFIG_SYSFS #ifdef CONFIG_SYSFS
struct bd_holder_disk { struct bd_holder_disk {
...@@ -1698,29 +1753,7 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) ...@@ -1698,29 +1753,7 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
/* finish claiming */ /* finish claiming */
mutex_lock(&bdev->bd_mutex); mutex_lock(&bdev->bd_mutex);
spin_lock(&bdev_lock); bd_finish_claiming(bdev, whole, holder);
if (!res) {
BUG_ON(!bd_may_claim(bdev, whole, holder));
/*
* Note that for a whole device bd_holders
* will be incremented twice, and bd_holder
* will be set to bd_may_claim before being
* set to holder
*/
whole->bd_holders++;
whole->bd_holder = bd_may_claim;
bdev->bd_holders++;
bdev->bd_holder = holder;
}
/* tell others that we're done */
BUG_ON(whole->bd_claiming != holder);
whole->bd_claiming = NULL;
wake_up_bit(&whole->bd_claiming, 0);
spin_unlock(&bdev_lock);
/* /*
* Block event polling for write claims if requested. Any * Block event polling for write claims if requested. Any
* write holder makes the write_holder state stick until * write holder makes the write_holder state stick until
......
...@@ -2598,6 +2598,12 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, ...@@ -2598,6 +2598,12 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
void *holder); void *holder);
extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
void *holder); void *holder);
extern struct block_device *bd_start_claiming(struct block_device *bdev,
void *holder);
extern void bd_finish_claiming(struct block_device *bdev,
struct block_device *whole, void *holder);
extern void bd_abort_claiming(struct block_device *bdev,
struct block_device *whole, void *holder);
extern void blkdev_put(struct block_device *bdev, fmode_t mode); extern void blkdev_put(struct block_device *bdev, fmode_t mode);
extern int __blkdev_reread_part(struct block_device *bdev); extern int __blkdev_reread_part(struct block_device *bdev);
extern int blkdev_reread_part(struct block_device *bdev); extern int blkdev_reread_part(struct block_device *bdev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment