Commit 260fa034 authored by NeilBrown's avatar NeilBrown

md: avoid deadlock when dirty buffers during md_stop.

When the last process closes /dev/mdX sync_blockdev will be called so
that all buffers get flushed.
So if it is then opened for the STOP_ARRAY ioctl to be sent there will
be nothing to flush.

However if we open /dev/mdX in order to send the STOP_ARRAY ioctl just
moments before some other process which was writing closes their file
descriptor, then there won't be a 'last close' and the buffers might
not get flushed.

So do_md_stop() calls sync_blockdev().  However at this point it is
holding ->reconfig_mutex.  So if the array is currently 'clean' then
the writes from sync_blockdev() will not complete until the array
can be marked dirty and that won't happen until some other thread
can get ->reconfig_mutex.  So we deadlock.

We need to move the sync_blockdev() call to before we take
->reconfig_mutex.
However then some other thread could open /dev/mdX and write to it
after we call sync_blockdev() and before we actually stop the array.
This can leave dirty data in the page cache which is awkward.

So introduce new flag MD_STILL_CLOSED.  Set it before calling
sync_blockdev(), clear it if anyone does open the file, and abort the
STOP_ARRAY attempt if it gets set before we lock against further
opens.

It is still possible to get problems if you open /dev/mdX, write to
it, then issue the STOP_ARRAY ioctl.  Just don't do that.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 7a0a5355
...@@ -5337,8 +5337,14 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) ...@@ -5337,8 +5337,14 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
err = -EBUSY; err = -EBUSY;
goto out; goto out;
} }
if (bdev) if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
sync_blockdev(bdev); /* Someone opened the device since we flushed it
* so page cache could be dirty and it is too late
* to flush. So abort
*/
mutex_unlock(&mddev->open_mutex);
return -EBUSY;
}
if (mddev->pers) { if (mddev->pers) {
__md_stop_writes(mddev); __md_stop_writes(mddev);
...@@ -5373,14 +5379,14 @@ static int do_md_stop(struct mddev * mddev, int mode, ...@@ -5373,14 +5379,14 @@ static int do_md_stop(struct mddev * mddev, int mode,
mutex_unlock(&mddev->open_mutex); mutex_unlock(&mddev->open_mutex);
return -EBUSY; return -EBUSY;
} }
if (bdev) if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
/* It is possible IO was issued on some other /* Someone opened the device since we flushed it
* open file which was closed before we took ->open_mutex. * so page cache could be dirty and it is too late
* As that was not the last close __blkdev_put will not * to flush. So abort
* have called sync_blockdev, so we must.
*/ */
sync_blockdev(bdev); mutex_unlock(&mddev->open_mutex);
return -EBUSY;
}
if (mddev->pers) { if (mddev->pers) {
if (mddev->ro) if (mddev->ro)
set_disk_ro(disk, 0); set_disk_ro(disk, 0);
...@@ -6417,6 +6423,20 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -6417,6 +6423,20 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
!test_bit(MD_RECOVERY_NEEDED, !test_bit(MD_RECOVERY_NEEDED,
&mddev->flags), &mddev->flags),
msecs_to_jiffies(5000)); msecs_to_jiffies(5000));
if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
/* Need to flush page cache, and ensure no-one else opens
* and writes
*/
mutex_lock(&mddev->open_mutex);
if (atomic_read(&mddev->openers) > 1) {
mutex_unlock(&mddev->open_mutex);
err = -EBUSY;
goto abort;
}
set_bit(MD_STILL_CLOSED, &mddev->flags);
mutex_unlock(&mddev->open_mutex);
sync_blockdev(bdev);
}
err = mddev_lock(mddev); err = mddev_lock(mddev);
if (err) { if (err) {
printk(KERN_INFO printk(KERN_INFO
...@@ -6670,6 +6690,7 @@ static int md_open(struct block_device *bdev, fmode_t mode) ...@@ -6670,6 +6690,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
err = 0; err = 0;
atomic_inc(&mddev->openers); atomic_inc(&mddev->openers);
clear_bit(MD_STILL_CLOSED, &mddev->flags);
mutex_unlock(&mddev->open_mutex); mutex_unlock(&mddev->open_mutex);
check_disk_change(bdev); check_disk_change(bdev);
......
...@@ -211,6 +211,9 @@ struct mddev { ...@@ -211,6 +211,9 @@ struct mddev {
#define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ #define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */
#define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */ #define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */
#define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */ #define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */
#define MD_STILL_CLOSED 4 /* If set, then array has not been opened since
* md_ioctl checked on it.
*/
int suspended; int suspended;
atomic_t active_io; atomic_t active_io;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment