Commit ed63287d authored by Lidong Zhong's avatar Lidong Zhong Committed by Mike Snitzer

dm raid1: keep issuing IO after leg failure

Currently if there is a leg failure, the bio will be put into the hold
list until userspace does a remove/replace on the leg.  Doing so in a
cluster config (clvmd) is problematic because there may be a temporary
path failure that results in cluster raid1 remove/replace.  Such
recovery takes a long time due to a full resync.

Update dm-raid1 to optionally ignore these failures so bios continue
being issued without interrupton.  To enable this feature userspace
must pass "keep_log" when creating the dm-raid1 device.
Signed-off-by: default avatarLidong Zhong <lzhong@suse.com>
Tested-by: default avatarLiuhua Wang <lwang@suse.com>
Acked-by: default avatarHeinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent f4ad317a
...@@ -23,8 +23,10 @@ ...@@ -23,8 +23,10 @@
#define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */
#define DM_RAID1_HANDLE_ERRORS 0x01 #define DM_RAID1_HANDLE_ERRORS 0x01
#define DM_RAID1_KEEP_LOG 0x02
#define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS)
#define keep_log(p) ((p)->features & DM_RAID1_KEEP_LOG)
static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
...@@ -229,7 +231,7 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) ...@@ -229,7 +231,7 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
if (m != get_default_mirror(ms)) if (m != get_default_mirror(ms))
goto out; goto out;
if (!ms->in_sync) { if (!ms->in_sync && !keep_log(ms)) {
/* /*
* Better to issue requests to same failing device * Better to issue requests to same failing device
* than to risk returning corrupt data. * than to risk returning corrupt data.
...@@ -370,6 +372,17 @@ static int recover(struct mirror_set *ms, struct dm_region *reg) ...@@ -370,6 +372,17 @@ static int recover(struct mirror_set *ms, struct dm_region *reg)
return r; return r;
} }
static void reset_ms_flags(struct mirror_set *ms)
{
unsigned int m;
ms->leg_failure = 0;
for (m = 0; m < ms->nr_mirrors; m++) {
atomic_set(&(ms->mirror[m].error_count), 0);
ms->mirror[m].error_type = 0;
}
}
static void do_recovery(struct mirror_set *ms) static void do_recovery(struct mirror_set *ms)
{ {
struct dm_region *reg; struct dm_region *reg;
...@@ -398,6 +411,7 @@ static void do_recovery(struct mirror_set *ms) ...@@ -398,6 +411,7 @@ static void do_recovery(struct mirror_set *ms)
/* the sync is complete */ /* the sync is complete */
dm_table_event(ms->ti->table); dm_table_event(ms->ti->table);
ms->in_sync = 1; ms->in_sync = 1;
reset_ms_flags(ms);
} }
} }
...@@ -759,7 +773,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) ...@@ -759,7 +773,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
dm_rh_delay(ms->rh, bio); dm_rh_delay(ms->rh, bio);
while ((bio = bio_list_pop(&nosync))) { while ((bio = bio_list_pop(&nosync))) {
if (unlikely(ms->leg_failure) && errors_handled(ms)) { if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) {
spin_lock_irq(&ms->lock); spin_lock_irq(&ms->lock);
bio_list_add(&ms->failures, bio); bio_list_add(&ms->failures, bio);
spin_unlock_irq(&ms->lock); spin_unlock_irq(&ms->lock);
...@@ -803,15 +817,21 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) ...@@ -803,15 +817,21 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
/* /*
* If all the legs are dead, fail the I/O. * If all the legs are dead, fail the I/O.
* If we have been told to handle errors, hold the bio * If the device has failed and keep_log is enabled,
* and wait for userspace to deal with the problem. * fail the I/O.
*
* If we have been told to handle errors, and keep_log
* isn't enabled, hold the bio and wait for userspace to
* deal with the problem.
*
* Otherwise pretend that the I/O succeeded. (This would * Otherwise pretend that the I/O succeeded. (This would
* be wrong if the failed leg returned after reboot and * be wrong if the failed leg returned after reboot and
* got replicated back to the good legs.) * got replicated back to the good legs.)
*/ */
if (!get_valid_mirror(ms))
if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure)))
bio_endio(bio, -EIO); bio_endio(bio, -EIO);
else if (errors_handled(ms)) else if (errors_handled(ms) && !keep_log(ms))
hold_bio(ms, bio); hold_bio(ms, bio);
else else
bio_endio(bio, 0); bio_endio(bio, 0);
...@@ -987,6 +1007,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, ...@@ -987,6 +1007,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
unsigned num_features; unsigned num_features;
struct dm_target *ti = ms->ti; struct dm_target *ti = ms->ti;
char dummy; char dummy;
int i;
*args_used = 0; *args_used = 0;
...@@ -1007,15 +1028,25 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, ...@@ -1007,15 +1028,25 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
return -EINVAL; return -EINVAL;
} }
if (!strcmp("handle_errors", argv[0])) for (i = 0; i < num_features; i++) {
ms->features |= DM_RAID1_HANDLE_ERRORS; if (!strcmp("handle_errors", argv[0]))
else { ms->features |= DM_RAID1_HANDLE_ERRORS;
ti->error = "Unrecognised feature requested"; else if (!strcmp("keep_log", argv[0]))
ms->features |= DM_RAID1_KEEP_LOG;
else {
ti->error = "Unrecognised feature requested";
return -EINVAL;
}
argc--;
argv++;
(*args_used)++;
}
if (!errors_handled(ms) && keep_log(ms)) {
ti->error = "keep_log feature requires the handle_errors feature";
return -EINVAL; return -EINVAL;
} }
(*args_used)++;
return 0; return 0;
} }
...@@ -1029,7 +1060,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, ...@@ -1029,7 +1060,7 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
* log_type is "core" or "disk" * log_type is "core" or "disk"
* #log_params is between 1 and 3 * #log_params is between 1 and 3
* *
* If present, features must be "handle_errors". * If present, supported features are "handle_errors" and "keep_log".
*/ */
static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{ {
...@@ -1363,6 +1394,7 @@ static void mirror_status(struct dm_target *ti, status_type_t type, ...@@ -1363,6 +1394,7 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen) unsigned status_flags, char *result, unsigned maxlen)
{ {
unsigned int m, sz = 0; unsigned int m, sz = 0;
int num_feature_args = 0;
struct mirror_set *ms = (struct mirror_set *) ti->private; struct mirror_set *ms = (struct mirror_set *) ti->private;
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
char buffer[ms->nr_mirrors + 1]; char buffer[ms->nr_mirrors + 1];
...@@ -1392,8 +1424,17 @@ static void mirror_status(struct dm_target *ti, status_type_t type, ...@@ -1392,8 +1424,17 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
DMEMIT(" %s %llu", ms->mirror[m].dev->name, DMEMIT(" %s %llu", ms->mirror[m].dev->name,
(unsigned long long)ms->mirror[m].offset); (unsigned long long)ms->mirror[m].offset);
if (ms->features & DM_RAID1_HANDLE_ERRORS) num_feature_args += !!errors_handled(ms);
DMEMIT(" 1 handle_errors"); num_feature_args += !!keep_log(ms);
if (num_feature_args) {
DMEMIT(" %d", num_feature_args);
if (errors_handled(ms))
DMEMIT(" handle_errors");
if (keep_log(ms))
DMEMIT(" keep_log");
}
break;
} }
} }
...@@ -1413,7 +1454,7 @@ static int mirror_iterate_devices(struct dm_target *ti, ...@@ -1413,7 +1454,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
static struct target_type mirror_target = { static struct target_type mirror_target = {
.name = "mirror", .name = "mirror",
.version = {1, 13, 2}, .version = {1, 14, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = mirror_ctr, .ctr = mirror_ctr,
.dtr = mirror_dtr, .dtr = mirror_dtr,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment