Commit 5fc01ab6 authored by Artem Bityutskiy's avatar Artem Bityutskiy

UBI: preserve corrupted PEBs

Currently UBI erases all corrupted eraseblocks, irrespectively of the nature
of corruption: corruption due to power cuts and non-power cut corruption.
The former case is OK, but the latter is not, because UBI may destroy
potentially important data.

With this patch, during scanning, when UBI hits a PEB with corrupted VID
header, it checks whether this PEB contains only 0xFF data. If yes, it is
safe to erase this PEB and it is put to the 'erase' list. If not, this may
be important data and it is better to avoid erasing this PEB. Instead,
UBI puts it to the corr list and moves out of the pool of available PEB.
IOW, UBI preserves this PEB.

Such corrupted PEB lessen the amount of available PEBs. So the more of them
we accumulate, the less PEBs are available. The maximum amount of non-power
cut corrupted PEBs is 8.

This patch is a response to UBIFS problem where reporter
(Matthew L. Creech <mlcreech@gmail.com>) observes that UBIFS index points
to an unmapped LEB. The theory is that corresponding PEB somehow got
corrupted and UBI wiped it. This patch (actually a series of patches)
tries to make sure such PEBs are preserved - this would make it is easier
to analyze the corruption.
Signed-off-by: default avatarArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
parent feeba4b8
...@@ -591,6 +591,7 @@ static int attach_by_scanning(struct ubi_device *ubi) ...@@ -591,6 +591,7 @@ static int attach_by_scanning(struct ubi_device *ubi)
ubi->bad_peb_count = si->bad_peb_count; ubi->bad_peb_count = si->bad_peb_count;
ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count; ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;
ubi->corr_peb_count = si->corr_peb_count;
ubi->max_ec = si->max_ec; ubi->max_ec = si->max_ec;
ubi->mean_ec = si->mean_ec; ubi->mean_ec = si->mean_ec;
ubi_msg("max. sequence number: %llu", si->max_sqnum); ubi_msg("max. sequence number: %llu", si->max_sqnum);
...@@ -972,6 +973,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) ...@@ -972,6 +973,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20); ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20);
ubi_msg("number of good PEBs: %d", ubi->good_peb_count); ubi_msg("number of good PEBs: %d", ubi->good_peb_count);
ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count); ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count);
ubi_msg("number of corrupted PEBs: %d", ubi->corr_peb_count);
ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots); ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots);
ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD); ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD);
ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT); ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT);
......
...@@ -1201,6 +1201,9 @@ static void print_rsvd_warning(struct ubi_device *ubi, ...@@ -1201,6 +1201,9 @@ static void print_rsvd_warning(struct ubi_device *ubi,
ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d," ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d,"
" need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); " need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
if (ubi->corr_peb_count)
ubi_warn("%d PEBs are corrupted and not used",
ubi->corr_peb_count);
} }
/** /**
...@@ -1263,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) ...@@ -1263,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
if (ubi->avail_pebs < EBA_RESERVED_PEBS) { if (ubi->avail_pebs < EBA_RESERVED_PEBS) {
ubi_err("no enough physical eraseblocks (%d, need %d)", ubi_err("no enough physical eraseblocks (%d, need %d)",
ubi->avail_pebs, EBA_RESERVED_PEBS); ubi->avail_pebs, EBA_RESERVED_PEBS);
if (ubi->corr_peb_count)
ubi_err("%d PEBs are corrupted and not used",
ubi->corr_peb_count);
err = -ENOSPC; err = -ENOSPC;
goto out_free; goto out_free;
} }
......
...@@ -706,8 +706,8 @@ int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, ...@@ -706,8 +706,8 @@ int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
struct ubi_scan_info *si) struct ubi_scan_info *si)
{ {
int err = 0, i; int err = 0;
struct ubi_scan_leb *seb; struct ubi_scan_leb *seb, *tmp_seb;
if (!list_empty(&si->free)) { if (!list_empty(&si->free)) {
seb = list_entry(si->free.next, struct ubi_scan_leb, u.list); seb = list_entry(si->free.next, struct ubi_scan_leb, u.list);
...@@ -716,37 +716,27 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, ...@@ -716,37 +716,27 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
return seb; return seb;
} }
for (i = 0; i < 2; i++) { /*
struct list_head *head; * We try to erase the first physical eraseblock from the erase list
struct ubi_scan_leb *tmp_seb; * and pick it if we succeed, or try to erase the next one if not. And
* so forth. We don't want to take care about bad eraseblocks here -
if (i == 0) * they'll be handled later.
head = &si->erase; */
else list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) {
head = &si->corr; if (seb->ec == UBI_SCAN_UNKNOWN_EC)
seb->ec = si->mean_ec;
/*
* We try to erase the first physical eraseblock from the @head
* list and pick it if we succeed, or try to erase the
* next one if not. And so forth. We don't want to take care
* about bad eraseblocks here - they'll be handled later.
*/
list_for_each_entry_safe(seb, tmp_seb, head, u.list) {
if (seb->ec == UBI_SCAN_UNKNOWN_EC)
seb->ec = si->mean_ec;
err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
if (err) if (err)
continue; continue;
seb->ec += 1; seb->ec += 1;
list_del(&seb->u.list); list_del(&seb->u.list);
dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
return seb; return seb;
}
} }
ubi_err("no eraseblocks found"); ubi_err("no free eraseblocks");
return ERR_PTR(-ENOSPC); return ERR_PTR(-ENOSPC);
} }
......
...@@ -361,6 +361,8 @@ struct ubi_wl_entry; ...@@ -361,6 +361,8 @@ struct ubi_wl_entry;
* @peb_size: physical eraseblock size * @peb_size: physical eraseblock size
* @bad_peb_count: count of bad physical eraseblocks * @bad_peb_count: count of bad physical eraseblocks
* @good_peb_count: count of good physical eraseblocks * @good_peb_count: count of good physical eraseblocks
* @corr_peb_count: count of corrupted physical eraseblocks (preserved and not
* used by UBI)
* @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous
* @max_erroneous: maximum allowed amount of erroneous physical eraseblocks * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks
* @min_io_size: minimal input/output unit size of the underlying MTD device * @min_io_size: minimal input/output unit size of the underlying MTD device
...@@ -447,6 +449,7 @@ struct ubi_device { ...@@ -447,6 +449,7 @@ struct ubi_device {
int peb_size; int peb_size;
int bad_peb_count; int bad_peb_count;
int good_peb_count; int good_peb_count;
int corr_peb_count;
int erroneous_peb_count; int erroneous_peb_count;
int max_erroneous; int max_erroneous;
int min_io_size; int min_io_size;
......
...@@ -261,6 +261,9 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) ...@@ -261,6 +261,9 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
/* Reserve physical eraseblocks */ /* Reserve physical eraseblocks */
if (vol->reserved_pebs > ubi->avail_pebs) { if (vol->reserved_pebs > ubi->avail_pebs) {
dbg_err("not enough PEBs, only %d available", ubi->avail_pebs); dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
if (ubi->corr_peb_count)
dbg_err("%d PEBs are corrupted and not used",
ubi->corr_peb_count);
err = -ENOSPC; err = -ENOSPC;
goto out_unlock; goto out_unlock;
} }
...@@ -527,6 +530,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) ...@@ -527,6 +530,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
if (pebs > ubi->avail_pebs) { if (pebs > ubi->avail_pebs) {
dbg_err("not enough PEBs: requested %d, available %d", dbg_err("not enough PEBs: requested %d, available %d",
pebs, ubi->avail_pebs); pebs, ubi->avail_pebs);
if (ubi->corr_peb_count)
dbg_err("%d PEBs are corrupted and not used",
ubi->corr_peb_count);
spin_unlock(&ubi->volumes_lock); spin_unlock(&ubi->volumes_lock);
err = -ENOSPC; err = -ENOSPC;
goto out_free; goto out_free;
......
...@@ -662,9 +662,13 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, ...@@ -662,9 +662,13 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
ubi->vol_count += 1; ubi->vol_count += 1;
vol->ubi = ubi; vol->ubi = ubi;
if (reserved_pebs > ubi->avail_pebs) if (reserved_pebs > ubi->avail_pebs) {
ubi_err("not enough PEBs, required %d, available %d", ubi_err("not enough PEBs, required %d, available %d",
reserved_pebs, ubi->avail_pebs); reserved_pebs, ubi->avail_pebs);
if (ubi->corr_peb_count)
ubi_err("%d PEBs are corrupted and not used",
ubi->corr_peb_count);
}
ubi->rsvd_pebs += reserved_pebs; ubi->rsvd_pebs += reserved_pebs;
ubi->avail_pebs -= reserved_pebs; ubi->avail_pebs -= reserved_pebs;
...@@ -837,7 +841,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) ...@@ -837,7 +841,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
return PTR_ERR(ubi->vtbl); return PTR_ERR(ubi->vtbl);
} }
ubi->avail_pebs = ubi->good_peb_count; ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count;
/* /*
* The layout volume is OK, initialize the corresponding in-RAM data * The layout volume is OK, initialize the corresponding in-RAM data
......
...@@ -1478,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) ...@@ -1478,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
ubi->lookuptbl[e->pnum] = e; ubi->lookuptbl[e->pnum] = e;
} }
list_for_each_entry(seb, &si->corr, u.list) {
cond_resched();
e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
if (!e)
goto out_free;
e->pnum = seb->pnum;
e->ec = seb->ec;
ubi->lookuptbl[e->pnum] = e;
if (schedule_erase(ubi, e, 0)) {
kmem_cache_free(ubi_wl_entry_slab, e);
goto out_free;
}
}
ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
cond_resched(); cond_resched();
...@@ -1520,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) ...@@ -1520,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
if (ubi->avail_pebs < WL_RESERVED_PEBS) { if (ubi->avail_pebs < WL_RESERVED_PEBS) {
ubi_err("no enough physical eraseblocks (%d, need %d)", ubi_err("no enough physical eraseblocks (%d, need %d)",
ubi->avail_pebs, WL_RESERVED_PEBS); ubi->avail_pebs, WL_RESERVED_PEBS);
if (ubi->corr_peb_count)
ubi_err("%d PEBs are corrupted and not used",
ubi->corr_peb_count);
goto out_free; goto out_free;
} }
ubi->avail_pebs -= WL_RESERVED_PEBS; ubi->avail_pebs -= WL_RESERVED_PEBS;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment