Commit dd553962 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md/4.10-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD fixes from Shaohua Li:
 "This fixes several corner cases for raid5 cache, which is merged into
  this cycle"

* tag 'md/4.10-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md/r5cache: disable write back for degraded array
  md/r5cache: shift complex rmw from read path to write path
  md/r5cache: flush data only stripes in r5l_recovery_log()
  md/raid5: move comment of fetch_block to right location
  md/r5cache: read data into orig_page for prexor of cached data
  md/raid5-cache: delete meaningless code
parents 64a172d2 2e38a37f
...@@ -5291,6 +5291,11 @@ int md_run(struct mddev *mddev) ...@@ -5291,6 +5291,11 @@ int md_run(struct mddev *mddev)
if (start_readonly && mddev->ro == 0) if (start_readonly && mddev->ro == 0)
mddev->ro = 2; /* read-only, but switch on first write */ mddev->ro = 2; /* read-only, but switch on first write */
/*
* NOTE: some pers->run(), for example r5l_recovery_log(), wakes
* up mddev->thread. It is important to initialize critical
* resources for mddev->thread BEFORE calling pers->run().
*/
err = pers->run(mddev); err = pers->run(mddev);
if (err) if (err)
pr_warn("md: pers->run() failed ...\n"); pr_warn("md: pers->run() failed ...\n");
......
...@@ -162,6 +162,8 @@ struct r5l_log { ...@@ -162,6 +162,8 @@ struct r5l_log {
/* to submit async io_units, to fulfill ordering of flush */ /* to submit async io_units, to fulfill ordering of flush */
struct work_struct deferred_io_work; struct work_struct deferred_io_work;
/* to disable write back during in degraded mode */
struct work_struct disable_writeback_work;
}; };
/* /*
...@@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work) ...@@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work)
r5l_do_submit_io(log, io); r5l_do_submit_io(log, io);
} }
static void r5c_disable_writeback_async(struct work_struct *work)
{
struct r5l_log *log = container_of(work, struct r5l_log,
disable_writeback_work);
struct mddev *mddev = log->rdev->mddev;
if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
return;
pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
mdname(mddev));
mddev_suspend(mddev);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
mddev_resume(mddev);
}
static void r5l_submit_current_io(struct r5l_log *log) static void r5l_submit_current_io(struct r5l_log *log)
{ {
struct r5l_io_unit *io = log->current_io; struct r5l_io_unit *io = log->current_io;
...@@ -1393,8 +1410,6 @@ static void r5l_do_reclaim(struct r5l_log *log) ...@@ -1393,8 +1410,6 @@ static void r5l_do_reclaim(struct r5l_log *log)
next_checkpoint = r5c_calculate_new_cp(conf); next_checkpoint = r5c_calculate_new_cp(conf);
spin_unlock_irq(&log->io_list_lock); spin_unlock_irq(&log->io_list_lock);
BUG_ON(reclaimable < 0);
if (reclaimable == 0 || !write_super) if (reclaimable == 0 || !write_super)
return; return;
...@@ -2062,7 +2077,7 @@ static int ...@@ -2062,7 +2077,7 @@ static int
r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
struct r5l_recovery_ctx *ctx) struct r5l_recovery_ctx *ctx)
{ {
struct stripe_head *sh, *next; struct stripe_head *sh;
struct mddev *mddev = log->rdev->mddev; struct mddev *mddev = log->rdev->mddev;
struct page *page; struct page *page;
sector_t next_checkpoint = MaxSector; sector_t next_checkpoint = MaxSector;
...@@ -2076,7 +2091,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, ...@@ -2076,7 +2091,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
WARN_ON(list_empty(&ctx->cached_list)); WARN_ON(list_empty(&ctx->cached_list));
list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) { list_for_each_entry(sh, &ctx->cached_list, lru) {
struct r5l_meta_block *mb; struct r5l_meta_block *mb;
int i; int i;
int offset; int offset;
...@@ -2126,14 +2141,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, ...@@ -2126,14 +2141,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
ctx->pos = write_pos; ctx->pos = write_pos;
ctx->seq += 1; ctx->seq += 1;
next_checkpoint = sh->log_start; next_checkpoint = sh->log_start;
list_del_init(&sh->lru);
raid5_release_stripe(sh);
} }
log->next_checkpoint = next_checkpoint; log->next_checkpoint = next_checkpoint;
__free_page(page); __free_page(page);
return 0; return 0;
} }
static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
struct r5l_recovery_ctx *ctx)
{
struct mddev *mddev = log->rdev->mddev;
struct r5conf *conf = mddev->private;
struct stripe_head *sh, *next;
if (ctx->data_only_stripes == 0)
return;
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK;
list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
r5c_make_stripe_write_out(sh);
set_bit(STRIPE_HANDLE, &sh->state);
list_del_init(&sh->lru);
raid5_release_stripe(sh);
}
md_wakeup_thread(conf->mddev->thread);
/* reuse conf->wait_for_quiescent in recovery */
wait_event(conf->wait_for_quiescent,
atomic_read(&conf->active_stripes) == 0);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
}
static int r5l_recovery_log(struct r5l_log *log) static int r5l_recovery_log(struct r5l_log *log)
{ {
struct mddev *mddev = log->rdev->mddev; struct mddev *mddev = log->rdev->mddev;
...@@ -2160,32 +2200,31 @@ static int r5l_recovery_log(struct r5l_log *log) ...@@ -2160,32 +2200,31 @@ static int r5l_recovery_log(struct r5l_log *log)
pos = ctx.pos; pos = ctx.pos;
ctx.seq += 10000; ctx.seq += 10000;
if (ctx.data_only_stripes == 0) {
log->next_checkpoint = ctx.pos;
r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
}
if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0)) if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
pr_debug("md/raid:%s: starting from clean shutdown\n", pr_debug("md/raid:%s: starting from clean shutdown\n",
mdname(mddev)); mdname(mddev));
else { else
pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n", pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
mdname(mddev), ctx.data_only_stripes, mdname(mddev), ctx.data_only_stripes,
ctx.data_parity_stripes); ctx.data_parity_stripes);
if (ctx.data_only_stripes > 0) if (ctx.data_only_stripes == 0) {
if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) { log->next_checkpoint = ctx.pos;
pr_err("md/raid:%s: failed to rewrite stripes to journal\n", r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
mdname(mddev)); ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
return -EIO; } else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
} pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
mdname(mddev));
return -EIO;
} }
log->log_start = ctx.pos; log->log_start = ctx.pos;
log->seq = ctx.seq; log->seq = ctx.seq;
log->last_checkpoint = pos; log->last_checkpoint = pos;
r5l_write_super(log, pos); r5l_write_super(log, pos);
r5c_recovery_flush_data_only_stripes(log, &ctx);
return 0; return 0;
} }
...@@ -2247,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev, ...@@ -2247,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
val > R5C_JOURNAL_MODE_WRITE_BACK) val > R5C_JOURNAL_MODE_WRITE_BACK)
return -EINVAL; return -EINVAL;
if (raid5_calc_degraded(conf) > 0 &&
val == R5C_JOURNAL_MODE_WRITE_BACK)
return -EINVAL;
mddev_suspend(mddev); mddev_suspend(mddev);
conf->log->r5c_journal_mode = val; conf->log->r5c_journal_mode = val;
mddev_resume(mddev); mddev_resume(mddev);
...@@ -2301,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf, ...@@ -2301,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf,
set_bit(STRIPE_R5C_CACHING, &sh->state); set_bit(STRIPE_R5C_CACHING, &sh->state);
} }
/*
* When run in degraded mode, array is set to write-through mode.
* This check helps drain pending write safely in the transition to
* write-through mode.
*/
if (s->failed) {
r5c_make_stripe_write_out(sh);
return -EAGAIN;
}
for (i = disks; i--; ) { for (i = disks; i--; ) {
dev = &sh->dev[i]; dev = &sh->dev[i];
/* if non-overwrite, use writing-out phase */ /* if non-overwrite, use writing-out phase */
...@@ -2351,6 +2404,8 @@ void r5c_release_extra_page(struct stripe_head *sh) ...@@ -2351,6 +2404,8 @@ void r5c_release_extra_page(struct stripe_head *sh)
struct page *p = sh->dev[i].orig_page; struct page *p = sh->dev[i].orig_page;
sh->dev[i].orig_page = sh->dev[i].page; sh->dev[i].orig_page = sh->dev[i].page;
clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
if (!using_disk_info_extra_page) if (!using_disk_info_extra_page)
put_page(p); put_page(p);
} }
...@@ -2555,6 +2610,19 @@ static int r5l_load_log(struct r5l_log *log) ...@@ -2555,6 +2610,19 @@ static int r5l_load_log(struct r5l_log *log)
return ret; return ret;
} }
void r5c_update_on_rdev_error(struct mddev *mddev)
{
struct r5conf *conf = mddev->private;
struct r5l_log *log = conf->log;
if (!log)
return;
if (raid5_calc_degraded(conf) > 0 &&
conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
schedule_work(&log->disable_writeback_work);
}
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{ {
struct request_queue *q = bdev_get_queue(rdev->bdev); struct request_queue *q = bdev_get_queue(rdev->bdev);
...@@ -2627,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) ...@@ -2627,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
spin_lock_init(&log->no_space_stripes_lock); spin_lock_init(&log->no_space_stripes_lock);
INIT_WORK(&log->deferred_io_work, r5l_submit_io_async); INIT_WORK(&log->deferred_io_work, r5l_submit_io_async);
INIT_WORK(&log->disable_writeback_work, r5c_disable_writeback_async);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
INIT_LIST_HEAD(&log->stripe_in_journal_list); INIT_LIST_HEAD(&log->stripe_in_journal_list);
...@@ -2659,6 +2728,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) ...@@ -2659,6 +2728,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
void r5l_exit_log(struct r5l_log *log) void r5l_exit_log(struct r5l_log *log)
{ {
flush_work(&log->disable_writeback_work);
md_unregister_thread(&log->reclaim_thread); md_unregister_thread(&log->reclaim_thread);
mempool_destroy(log->meta_pool); mempool_destroy(log->meta_pool);
bioset_free(log->bs); bioset_free(log->bs);
......
...@@ -556,7 +556,7 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, ...@@ -556,7 +556,7 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
* of the two sections, and some non-in_sync devices may * of the two sections, and some non-in_sync devices may
* be insync in the section most affected by failed devices. * be insync in the section most affected by failed devices.
*/ */
static int calc_degraded(struct r5conf *conf) int raid5_calc_degraded(struct r5conf *conf)
{ {
int degraded, degraded2; int degraded, degraded2;
int i; int i;
...@@ -619,7 +619,7 @@ static int has_failed(struct r5conf *conf) ...@@ -619,7 +619,7 @@ static int has_failed(struct r5conf *conf)
if (conf->mddev->reshape_position == MaxSector) if (conf->mddev->reshape_position == MaxSector)
return conf->mddev->degraded > conf->max_degraded; return conf->mddev->degraded > conf->max_degraded;
degraded = calc_degraded(conf); degraded = raid5_calc_degraded(conf);
if (degraded > conf->max_degraded) if (degraded > conf->max_degraded)
return 1; return 1;
return 0; return 0;
...@@ -1015,7 +1015,17 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -1015,7 +1015,17 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
sh->dev[i].vec.bv_page = sh->dev[i].page;
if (!op_is_write(op) &&
test_bit(R5_InJournal, &sh->dev[i].flags))
/*
* issuing read for a page in journal, this
* must be preparing for prexor in rmw; read
* the data into orig_page
*/
sh->dev[i].vec.bv_page = sh->dev[i].orig_page;
else
sh->dev[i].vec.bv_page = sh->dev[i].page;
bi->bi_vcnt = 1; bi->bi_vcnt = 1;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE; bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0; bi->bi_io_vec[0].bv_offset = 0;
...@@ -2380,6 +2390,13 @@ static void raid5_end_read_request(struct bio * bi) ...@@ -2380,6 +2390,13 @@ static void raid5_end_read_request(struct bio * bi)
} else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
if (test_bit(R5_InJournal, &sh->dev[i].flags))
/*
* end read for a page in journal, this
* must be preparing for prexor in rmw
*/
set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
if (atomic_read(&rdev->read_errors)) if (atomic_read(&rdev->read_errors))
atomic_set(&rdev->read_errors, 0); atomic_set(&rdev->read_errors, 0);
} else { } else {
...@@ -2538,7 +2555,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -2538,7 +2555,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
mddev->degraded = calc_degraded(conf); mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
...@@ -2552,6 +2569,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) ...@@ -2552,6 +2569,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
bdevname(rdev->bdev, b), bdevname(rdev->bdev, b),
mdname(mddev), mdname(mddev),
conf->raid_disks - mddev->degraded); conf->raid_disks - mddev->degraded);
r5c_update_on_rdev_error(mddev);
} }
/* /*
...@@ -2880,6 +2898,30 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous) ...@@ -2880,6 +2898,30 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
return r_sector; return r_sector;
} }
/*
* There are cases where we want handle_stripe_dirtying() and
* schedule_reconstruction() to delay towrite to some dev of a stripe.
*
* This function checks whether we want to delay the towrite. Specifically,
* we delay the towrite when:
*
* 1. degraded stripe has a non-overwrite to the missing dev, AND this
* stripe has data in journal (for other devices).
*
* In this case, when reading data for the non-overwrite dev, it is
* necessary to handle complex rmw of write back cache (prexor with
* orig_page, and xor with page). To keep read path simple, we would
* like to flush data in journal to RAID disks first, so complex rmw
* is handled in the write patch (handle_stripe_dirtying).
*
*/
static inline bool delay_towrite(struct r5dev *dev,
struct stripe_head_state *s)
{
return !test_bit(R5_OVERWRITE, &dev->flags) &&
!test_bit(R5_Insync, &dev->flags) && s->injournal;
}
static void static void
schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
int rcw, int expand) int rcw, int expand)
...@@ -2900,7 +2942,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, ...@@ -2900,7 +2942,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if (dev->towrite) { if (dev->towrite && !delay_towrite(dev, s)) {
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantdrain, &dev->flags); set_bit(R5_Wantdrain, &dev->flags);
if (!expand) if (!expand)
...@@ -3295,13 +3337,6 @@ static int want_replace(struct stripe_head *sh, int disk_idx) ...@@ -3295,13 +3337,6 @@ static int want_replace(struct stripe_head *sh, int disk_idx)
return rv; return rv;
} }
/* fetch_block - checks the given member device to see if its data needs
* to be read or computed to satisfy a request.
*
* Returns 1 when no more member devices need to be checked, otherwise returns
* 0 to tell the loop in handle_stripe_fill to continue
*/
static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
int disk_idx, int disks) int disk_idx, int disks)
{ {
...@@ -3392,6 +3427,12 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, ...@@ -3392,6 +3427,12 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
return 0; return 0;
} }
/* fetch_block - checks the given member device to see if its data needs
* to be read or computed to satisfy a request.
*
* Returns 1 when no more member devices need to be checked, otherwise returns
* 0 to tell the loop in handle_stripe_fill to continue
*/
static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
int disk_idx, int disks) int disk_idx, int disks)
{ {
...@@ -3478,10 +3519,26 @@ static void handle_stripe_fill(struct stripe_head *sh, ...@@ -3478,10 +3519,26 @@ static void handle_stripe_fill(struct stripe_head *sh,
* midst of changing due to a write * midst of changing due to a write
*/ */
if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
!sh->reconstruct_state) !sh->reconstruct_state) {
/*
* For degraded stripe with data in journal, do not handle
* read requests yet, instead, flush the stripe to raid
* disks first, this avoids handling complex rmw of write
* back cache (prexor with orig_page, and then xor with
* page) in the read path
*/
if (s->injournal && s->failed) {
if (test_bit(STRIPE_R5C_CACHING, &sh->state))
r5c_make_stripe_write_out(sh);
goto out;
}
for (i = disks; i--; ) for (i = disks; i--; )
if (fetch_block(sh, s, i, disks)) if (fetch_block(sh, s, i, disks))
break; break;
}
out:
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
} }
...@@ -3594,6 +3651,21 @@ static void handle_stripe_clean_event(struct r5conf *conf, ...@@ -3594,6 +3651,21 @@ static void handle_stripe_clean_event(struct r5conf *conf,
break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
} }
/*
* For RMW in write back cache, we need extra page in prexor to store the
* old data. This page is stored in dev->orig_page.
*
* This function checks whether we have data for prexor. The exact logic
* is:
* R5_UPTODATE && (!R5_InJournal || R5_OrigPageUPTDODATE)
*/
static inline bool uptodate_for_rmw(struct r5dev *dev)
{
return (test_bit(R5_UPTODATE, &dev->flags)) &&
(!test_bit(R5_InJournal, &dev->flags) ||
test_bit(R5_OrigPageUPTDODATE, &dev->flags));
}
static int handle_stripe_dirtying(struct r5conf *conf, static int handle_stripe_dirtying(struct r5conf *conf,
struct stripe_head *sh, struct stripe_head *sh,
struct stripe_head_state *s, struct stripe_head_state *s,
...@@ -3622,12 +3694,11 @@ static int handle_stripe_dirtying(struct r5conf *conf, ...@@ -3622,12 +3694,11 @@ static int handle_stripe_dirtying(struct r5conf *conf,
} else for (i = disks; i--; ) { } else for (i = disks; i--; ) {
/* would I have to read this buffer for read_modify_write */ /* would I have to read this buffer for read_modify_write */
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx || if (((dev->towrite && !delay_towrite(dev, s)) ||
i == sh->pd_idx || i == sh->qd_idx ||
test_bit(R5_InJournal, &dev->flags)) && test_bit(R5_InJournal, &dev->flags)) &&
!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
!((test_bit(R5_UPTODATE, &dev->flags) && !(uptodate_for_rmw(dev) ||
(!test_bit(R5_InJournal, &dev->flags) ||
dev->page != dev->orig_page)) ||
test_bit(R5_Wantcompute, &dev->flags))) { test_bit(R5_Wantcompute, &dev->flags))) {
if (test_bit(R5_Insync, &dev->flags)) if (test_bit(R5_Insync, &dev->flags))
rmw++; rmw++;
...@@ -3639,7 +3710,6 @@ static int handle_stripe_dirtying(struct r5conf *conf, ...@@ -3639,7 +3710,6 @@ static int handle_stripe_dirtying(struct r5conf *conf,
i != sh->pd_idx && i != sh->qd_idx && i != sh->pd_idx && i != sh->qd_idx &&
!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
!(test_bit(R5_UPTODATE, &dev->flags) || !(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_InJournal, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) { test_bit(R5_Wantcompute, &dev->flags))) {
if (test_bit(R5_Insync, &dev->flags)) if (test_bit(R5_Insync, &dev->flags))
rcw++; rcw++;
...@@ -3689,13 +3759,11 @@ static int handle_stripe_dirtying(struct r5conf *conf, ...@@ -3689,13 +3759,11 @@ static int handle_stripe_dirtying(struct r5conf *conf,
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || if (((dev->towrite && !delay_towrite(dev, s)) ||
i == sh->pd_idx || i == sh->qd_idx || i == sh->pd_idx || i == sh->qd_idx ||
test_bit(R5_InJournal, &dev->flags)) && test_bit(R5_InJournal, &dev->flags)) &&
!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
!((test_bit(R5_UPTODATE, &dev->flags) && !(uptodate_for_rmw(dev) ||
(!test_bit(R5_InJournal, &dev->flags) ||
dev->page != dev->orig_page)) ||
test_bit(R5_Wantcompute, &dev->flags)) && test_bit(R5_Wantcompute, &dev->flags)) &&
test_bit(R5_Insync, &dev->flags)) { test_bit(R5_Insync, &dev->flags)) {
if (test_bit(STRIPE_PREREAD_ACTIVE, if (test_bit(STRIPE_PREREAD_ACTIVE,
...@@ -3722,7 +3790,6 @@ static int handle_stripe_dirtying(struct r5conf *conf, ...@@ -3722,7 +3790,6 @@ static int handle_stripe_dirtying(struct r5conf *conf,
i != sh->pd_idx && i != sh->qd_idx && i != sh->pd_idx && i != sh->qd_idx &&
!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
!(test_bit(R5_UPTODATE, &dev->flags) || !(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_InJournal, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) { test_bit(R5_Wantcompute, &dev->flags))) {
rcw++; rcw++;
if (test_bit(R5_Insync, &dev->flags) && if (test_bit(R5_Insync, &dev->flags) &&
...@@ -7025,7 +7092,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7025,7 +7092,7 @@ static int raid5_run(struct mddev *mddev)
/* /*
* 0 for a fully functional array, 1 or 2 for a degraded array. * 0 for a fully functional array, 1 or 2 for a degraded array.
*/ */
mddev->degraded = calc_degraded(conf); mddev->degraded = raid5_calc_degraded(conf);
if (has_failed(conf)) { if (has_failed(conf)) {
pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n", pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n",
...@@ -7272,7 +7339,7 @@ static int raid5_spare_active(struct mddev *mddev) ...@@ -7272,7 +7339,7 @@ static int raid5_spare_active(struct mddev *mddev)
} }
} }
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded = calc_degraded(conf); mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
print_raid5_conf(conf); print_raid5_conf(conf);
return count; return count;
...@@ -7632,7 +7699,7 @@ static int raid5_start_reshape(struct mddev *mddev) ...@@ -7632,7 +7699,7 @@ static int raid5_start_reshape(struct mddev *mddev)
* pre and post number of devices. * pre and post number of devices.
*/ */
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded = calc_degraded(conf); mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
} }
mddev->raid_disks = conf->raid_disks; mddev->raid_disks = conf->raid_disks;
...@@ -7720,7 +7787,7 @@ static void raid5_finish_reshape(struct mddev *mddev) ...@@ -7720,7 +7787,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
} else { } else {
int d; int d;
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
mddev->degraded = calc_degraded(conf); mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
for (d = conf->raid_disks ; for (d = conf->raid_disks ;
d < conf->raid_disks - mddev->delta_disks; d < conf->raid_disks - mddev->delta_disks;
......
...@@ -322,6 +322,11 @@ enum r5dev_flags { ...@@ -322,6 +322,11 @@ enum r5dev_flags {
* data and parity being written are in the journal * data and parity being written are in the journal
* device * device
*/ */
R5_OrigPageUPTDODATE, /* with write back cache, we read old data into
* dev->orig_page for prexor. When this flag is
* set, orig_page contains latest data in the
* raid disk.
*/
}; };
/* /*
...@@ -753,6 +758,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, ...@@ -753,6 +758,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
extern struct stripe_head * extern struct stripe_head *
raid5_get_active_stripe(struct r5conf *conf, sector_t sector, raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
int previous, int noblock, int noquiesce); int previous, int noblock, int noquiesce);
extern int raid5_calc_degraded(struct r5conf *conf);
extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev); extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev);
extern void r5l_exit_log(struct r5l_log *log); extern void r5l_exit_log(struct r5l_log *log);
extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh); extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh);
...@@ -781,4 +787,5 @@ extern void r5c_flush_cache(struct r5conf *conf, int num); ...@@ -781,4 +787,5 @@ extern void r5c_flush_cache(struct r5conf *conf, int num);
extern void r5c_check_stripe_cache_usage(struct r5conf *conf); extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
extern void r5c_check_cached_full_stripe(struct r5conf *conf); extern void r5c_check_cached_full_stripe(struct r5conf *conf);
extern struct md_sysfs_entry r5c_journal_mode; extern struct md_sysfs_entry r5c_journal_mode;
extern void r5c_update_on_rdev_error(struct mddev *mddev);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment