[PATCH] md: fix potential memalloc deadlock in md

If a GFP_KERNEL allocation is attempted in md while the mddev_lock is held, it is possible for a deadlock to eventuate. This happens if the array was marked 'clean', and the memalloc triggers a write-out to the md device. For the writeout to succeed, the array must be marked 'dirty', and that requires getting the mddev_lock. So, before attempting a GFP_KERNEL allocation while holding the lock, make sure the array is marked 'dirty' (unless it is currently read-only). Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

[PATCH] md: fix potential memalloc deadlock in md
If a GFP_KERNEL allocation is attempted in md while the mddev_lock is held, it is possible for a deadlock to eventuate. This happens if the array was marked 'clean', and the memalloc triggers a write-out to the md device. For the writeout to succeed, the array must be marked 'dirty', and that requires getting the mddev_lock. So, before attempting a GFP_KERNEL allocation while holding the lock, make sure the array is marked 'dirty' (unless it is currently read-only). Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2a2275d6 · NeilBrown · Linus Torvalds · a0ad13ef · 2a2275d6 · 2a2275d6
Commit 2a2275d6 authored Jan 26, 2007 by NeilBrown Committed by Linus Torvalds Jan 26, 2007
Showing with 35 additions and 1 deletion

drivers/md/md.c drivers/md/md.c +29 -0

drivers/md/raid1.c drivers/md/raid1.c +2 -0

drivers/md/raid5.c drivers/md/raid5.c +3 -0

include/linux/raid/md.h include/linux/raid/md.h +1 -1

No files found.
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3564,6 +3564,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg)
 	char *ptr, *buf = NULL;
 	int err = -ENOMEM;

+	md_allow_write(mddev);
+
 	file = kmalloc(sizeof(*file), GFP_KERNEL);
 	if (!file)
 		goto out;
@@ -5032,6 +5034,33 @@ void md_write_end(mddev_t *mddev)
 	}
 }

+/* md_allow_write(mddev)
+ * Calling this ensures that the array is marked 'active' so that writes
+ * may proceed without blocking.  It is important to call this before
+ * attempting a GFP_KERNEL allocation while holding the mddev lock.
+ * Must be called with mddev_lock held.
+ */
+void md_allow_write(mddev_t *mddev)
+{
+	if (!mddev->pers)
+		return;
+	if (mddev->ro)
+		return;
+
+	spin_lock_irq(&mddev->write_lock);
+	if (mddev->in_sync) {
+		mddev->in_sync = 0;
+		set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+		if (mddev->safemode_delay &&
+		    mddev->safemode == 0)
+			mddev->safemode = 1;
+		spin_unlock_irq(&mddev->write_lock);
+		md_update_sb(mddev, 0);
+	} else
+		spin_unlock_irq(&mddev->write_lock);
+}
+EXPORT_SYMBOL_GPL(md_allow_write);
+
 static DECLARE_WAIT_QUEUE_HEAD(resync_wait);

 #define SYNC_MARKS	10

--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2104,6 +2104,8 @@ static int raid1_reshape(mddev_t *mddev)
 		return -EINVAL;
 	}

+	md_allow_write(mddev);
+
 	raid_disks = mddev->raid_disks + mddev->delta_disks;

 	if (raid_disks < conf->raid_disks) {

--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -405,6 +405,8 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
 	if (newsize <= conf->pool_size)
 		return 0; /* never bother to shrink */

+	md_allow_write(conf->mddev);
+
 	/* Step 1 */
 	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
 			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
@@ -3250,6 +3252,7 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
 		else
 			break;
 	}
+	md_allow_write(mddev);
 	while (new > conf->max_nr_stripes) {
 		if (grow_one_stripe(conf))
 			conf->max_nr_stripes++;

--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -94,7 +94,7 @@ extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
 			struct page *page, int rw);
 extern void md_do_sync(mddev_t *mddev);
 extern void md_new_event(mddev_t *mddev);
-
+extern void md_allow_write(mddev_t *mddev);

 #endif /* CONFIG_MD */
 #endif