Commit eb08d8ff authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6

* 'linux-next' of git://git.infradead.org/ubifs-2.6: (52 commits)
  UBIFS: switch to dynamic printks
  UBIFS: fix kernel-doc comments
  UBIFS: fix extremely rare mount failure
  UBIFS: simplify LEB recovery function further
  UBIFS: always cleanup the recovered LEB
  UBIFS: clean up LEB recovery function
  UBIFS: fix-up free space on mount if flag is set
  UBIFS: add the fixup function
  UBIFS: add a superblock flag for free space fix-up
  UBIFS: share the next_log_lnum helper
  UBIFS: expect corruption only in last journal head LEBs
  UBIFS: synchronize write-buffer before switching to the next bud
  UBIFS: remove BUG statement
  UBIFS: change bud replay function conventions
  UBIFS: substitute the replay tree with a replay list
  UBIFS: simplify replay
  UBIFS: store free and dirty space in the bud replay entry
  UBIFS: remove unnecessary stack variable
  UBIFS: double check that buds are replied in order
  UBIFS: make 2 functions static
  ...
parents 9f22aae0 56e46742
......@@ -115,28 +115,8 @@ ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
Module Parameters for Debugging
===============================
When UBIFS has been compiled with debugging enabled, there are 3 module
When UBIFS has been compiled with debugging enabled, there are 2 module
parameters that are available to control aspects of testing and debugging.
The parameters are unsigned integers where each bit controls an option.
The parameters are:
debug_msgs Selects which debug messages to display, as follows:
Message Type Flag value
General messages 1
Journal messages 2
Mount messages 4
Commit messages 8
LEB search messages 16
Budgeting messages 32
Garbage collection messages 64
Tree Node Cache (TNC) messages 128
LEB properties (lprops) messages 256
Input/output messages 512
Log messages 1024
Scan messages 2048
Recovery messages 4096
debug_chks Selects extra checks that UBIFS can do while running:
......@@ -154,11 +134,9 @@ debug_tsts Selects a mode of testing, as follows:
Test mode Flag value
Force in-the-gaps method 2
Failure mode for recovery testing 4
For example, set debug_msgs to 5 to display General messages and Mount
messages.
For example, set debug_chks to 3 to enable general and TNC checks.
References
......
......@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c)
long long liab;
spin_lock(&c->space_lock);
liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth;
liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
spin_unlock(&c->space_lock);
return liab;
}
......@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
int idx_lebs;
long long idx_size;
idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
/* And make sure we have thrice the index size of space reserved */
idx_size += idx_size << 1;
/*
......@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c)
* budgeted index space to the size of the current index, multiplies this by 3,
* and makes sure this does not exceed the amount of free LEBs.
*
* Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
* Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
* o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
* be large, because UBIFS does not do any index consolidation as long as
* there is free space. IOW, the index may take a lot of LEBs, but the LEBs
* will contain a lot of dirt.
* o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW,
* the index may be consolidated to take up to @c->min_idx_lebs LEBs.
* o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
* the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
*
* This function returns zero in case of success, and %-ENOSPC in case of
* failure.
......@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c)
c->lst.taken_empty_lebs;
if (unlikely(rsvd_idx_lebs > lebs)) {
dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
"rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
"rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
rsvd_idx_lebs);
return -ENOSPC;
}
available = ubifs_calc_available(c, min_idx_lebs);
outstanding = c->budg_data_growth + c->budg_dd_growth;
outstanding = c->bi.data_growth + c->bi.dd_growth;
if (unlikely(available < outstanding)) {
dbg_budg("out of data space: available %lld, outstanding %lld",
......@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c)
if (available - outstanding <= c->rp_size && !can_use_rp(c))
return -ENOSPC;
c->min_idx_lebs = min_idx_lebs;
c->bi.min_idx_lebs = min_idx_lebs;
return 0;
}
......@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c,
{
int data_growth;
data_growth = req->new_ino ? c->inode_budget : 0;
data_growth = req->new_ino ? c->bi.inode_budget : 0;
if (req->new_page)
data_growth += c->page_budget;
data_growth += c->bi.page_budget;
if (req->new_dent)
data_growth += c->dent_budget;
data_growth += c->bi.dent_budget;
data_growth += req->new_ino_d;
return data_growth;
}
......@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c,
{
int dd_growth;
dd_growth = req->dirtied_page ? c->page_budget : 0;
dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
if (req->dirtied_ino)
dd_growth += c->inode_budget << (req->dirtied_ino - 1);
dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
if (req->mod_dent)
dd_growth += c->dent_budget;
dd_growth += c->bi.dent_budget;
dd_growth += req->dirtied_ino_d;
return dd_growth;
}
......@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
again:
spin_lock(&c->space_lock);
ubifs_assert(c->budg_idx_growth >= 0);
ubifs_assert(c->budg_data_growth >= 0);
ubifs_assert(c->budg_dd_growth >= 0);
ubifs_assert(c->bi.idx_growth >= 0);
ubifs_assert(c->bi.data_growth >= 0);
ubifs_assert(c->bi.dd_growth >= 0);
if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
dbg_budg("no space");
spin_unlock(&c->space_lock);
return -ENOSPC;
}
c->budg_idx_growth += idx_growth;
c->budg_data_growth += data_growth;
c->budg_dd_growth += dd_growth;
c->bi.idx_growth += idx_growth;
c->bi.data_growth += data_growth;
c->bi.dd_growth += dd_growth;
err = do_budget_space(c);
if (likely(!err)) {
......@@ -484,9 +484,9 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
}
/* Restore the old values */
c->budg_idx_growth -= idx_growth;
c->budg_data_growth -= data_growth;
c->budg_dd_growth -= dd_growth;
c->bi.idx_growth -= idx_growth;
c->bi.data_growth -= data_growth;
c->bi.dd_growth -= dd_growth;
spin_unlock(&c->space_lock);
if (req->fast) {
......@@ -506,9 +506,9 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
goto again;
}
dbg_budg("FS is full, -ENOSPC");
c->nospace = 1;
c->bi.nospace = 1;
if (can_use_rp(c) || c->rp_size == 0)
c->nospace_rp = 1;
c->bi.nospace_rp = 1;
smp_wmb();
} else
ubifs_err("cannot budget space, error %d", err);
......@@ -523,8 +523,8 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
* This function releases the space budgeted by 'ubifs_budget_space()'. Note,
* since the index changes (which were budgeted for in @req->idx_growth) will
* only be written to the media on commit, this function moves the index budget
* from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
* zeroed by the commit operation.
* from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
* by the commit operation.
*/
void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
{
......@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
if (!req->data_growth && !req->dd_growth)
return;
c->nospace = c->nospace_rp = 0;
c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
spin_lock(&c->space_lock);
c->budg_idx_growth -= req->idx_growth;
c->budg_uncommitted_idx += req->idx_growth;
c->budg_data_growth -= req->data_growth;
c->budg_dd_growth -= req->dd_growth;
c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
ubifs_assert(c->budg_idx_growth >= 0);
ubifs_assert(c->budg_data_growth >= 0);
ubifs_assert(c->budg_dd_growth >= 0);
ubifs_assert(c->min_idx_lebs < c->main_lebs);
ubifs_assert(!(c->budg_idx_growth & 7));
ubifs_assert(!(c->budg_data_growth & 7));
ubifs_assert(!(c->budg_dd_growth & 7));
c->bi.idx_growth -= req->idx_growth;
c->bi.uncommitted_idx += req->idx_growth;
c->bi.data_growth -= req->data_growth;
c->bi.dd_growth -= req->dd_growth;
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
ubifs_assert(c->bi.idx_growth >= 0);
ubifs_assert(c->bi.data_growth >= 0);
ubifs_assert(c->bi.dd_growth >= 0);
ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
ubifs_assert(!(c->bi.idx_growth & 7));
ubifs_assert(!(c->bi.data_growth & 7));
ubifs_assert(!(c->bi.dd_growth & 7));
spin_unlock(&c->space_lock);
}
......@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
{
spin_lock(&c->space_lock);
/* Release the index growth reservation */
c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
/* Release the data growth reservation */
c->budg_data_growth -= c->page_budget;
c->bi.data_growth -= c->bi.page_budget;
/* Increase the dirty data growth reservation instead */
c->budg_dd_growth += c->page_budget;
c->bi.dd_growth += c->bi.page_budget;
/* And re-calculate the indexing space reservation */
c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
spin_unlock(&c->space_lock);
}
......@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
memset(&req, 0, sizeof(struct ubifs_budget_req));
/* The "no space" flags will be cleared because dd_growth is > 0 */
req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
ubifs_release_budget(c, &req);
}
......@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
int rsvd_idx_lebs, lebs;
long long available, outstanding, free;
ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
outstanding = c->budg_data_growth + c->budg_dd_growth;
available = ubifs_calc_available(c, c->min_idx_lebs);
ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
outstanding = c->bi.data_growth + c->bi.dd_growth;
available = ubifs_calc_available(c, c->bi.min_idx_lebs);
/*
* When reporting free space to user-space, UBIFS guarantees that it is
......@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
* Note, the calculations below are similar to what we have in
* 'do_budget_space()', so refer there for comments.
*/
if (c->min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
if (c->bi.min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
......
......@@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c)
c->mst_node->root_len = cpu_to_le32(zroot.len);
c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
c->mst_node->index_size = cpu_to_le64(c->old_idx_sz);
c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
......
......@@ -34,7 +34,6 @@
#include <linux/moduleparam.h>
#include <linux/debugfs.h>
#include <linux/math64.h>
#include <linux/slab.h>
#ifdef CONFIG_UBIFS_FS_DEBUG
......@@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
static char dbg_key_buf0[128];
static char dbg_key_buf1[128];
unsigned int ubifs_msg_flags;
unsigned int ubifs_chk_flags;
unsigned int ubifs_tst_flags;
module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
MODULE_PARM_DESC(debug_chks, "Debug check flags");
MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
......@@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
printk(KERN_DEBUG "\t big_lpt %u\n",
!!(sup_flags & UBIFS_FLG_BIGLPT));
printk(KERN_DEBUG "\t space_fixup %u\n",
!!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
printk(KERN_DEBUG "\tmin_io_size %u\n",
le32_to_cpu(sup->min_io_size));
printk(KERN_DEBUG "\tleb_size %u\n",
......@@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
spin_unlock(&dbg_lock);
}
void dbg_dump_budg(struct ubifs_info *c)
void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
{
int i;
struct rb_node *rb;
......@@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c)
struct ubifs_gced_idx_leb *idx_gc;
long long available, outstanding, free;
ubifs_assert(spin_is_locked(&c->space_lock));
spin_lock(&c->space_lock);
spin_lock(&dbg_lock);
printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
"budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
"freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
c->freeable_cnt);
printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
"calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
"total budget sum %lld\n", current->pid,
bi->data_growth + bi->dd_growth,
bi->data_growth + bi->dd_growth + bi->idx_growth);
printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
"budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
bi->idx_growth);
printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
"uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
bi->uncommitted_idx);
printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
bi->page_budget, bi->inode_budget, bi->dent_budget);
printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
bi->nospace, bi->nospace_rp);
printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
c->dark_wm, c->dead_wm, c->max_idx_node_sz);
if (bi != &c->bi)
/*
* If we are dumping saved budgeting data, do not print
* additional information which is about the current state, not
* the old one which corresponded to the saved budgeting data.
*/
goto out_unlock;
printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
"clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
atomic_long_read(&c->dirty_zn_cnt),
atomic_long_read(&c->clean_zn_cnt));
printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
c->dark_wm, c->dead_wm, c->max_idx_node_sz);
printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
c->gc_lnum, c->ihead_lnum);
/* If we are in R/O mode, journal heads do not exist */
if (c->jheads)
for (i = 0; i < c->jhead_cnt; i++)
......@@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c)
printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
/* Print budgeting predictions */
available = ubifs_calc_available(c, c->min_idx_lebs);
outstanding = c->budg_data_growth + c->budg_dd_growth;
available = ubifs_calc_available(c, c->bi.min_idx_lebs);
outstanding = c->bi.data_growth + c->bi.dd_growth;
free = ubifs_get_free_space_nolock(c);
printk(KERN_DEBUG "Budgeting predictions:\n");
printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
available, outstanding, free);
out_unlock:
spin_unlock(&dbg_lock);
spin_unlock(&c->space_lock);
}
void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
......@@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
if (bud->lnum == lp->lnum) {
int head = 0;
for (i = 0; i < c->jhead_cnt; i++) {
if (lp->lnum == c->jheads[i].wbuf.lnum) {
/*
* Note, if we are in R/O mode or in the middle
* of mounting/re-mounting, the write-buffers do
* not exist.
*/
if (c->jheads &&
lp->lnum == c->jheads[i].wbuf.lnum) {
printk(KERN_CONT ", jhead %s",
dbg_jhead(i));
head = 1;
......@@ -976,6 +998,8 @@ void dbg_save_space_info(struct ubifs_info *c)
spin_lock(&c->space_lock);
memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
d->saved_idx_gc_cnt = c->idx_gc_cnt;
/*
* We use a dirty hack here and zero out @c->freeable_cnt, because it
......@@ -1042,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c)
out:
ubifs_msg("saved lprops statistics dump");
dbg_dump_lstats(&d->saved_lst);
ubifs_get_lp_stats(c, &lst);
ubifs_msg("saved budgeting info dump");
dbg_dump_budg(c, &d->saved_bi);
ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
ubifs_msg("current lprops statistics dump");
ubifs_get_lp_stats(c, &lst);
dbg_dump_lstats(&lst);
spin_lock(&c->space_lock);
dbg_dump_budg(c);
spin_unlock(&c->space_lock);
ubifs_msg("current budgeting info dump");
dbg_dump_budg(c, &c->bi);
dump_stack();
return -EINVAL;
}
......@@ -1793,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
struct rb_node **p, *parent = NULL;
struct fsck_inode *fscki;
ino_t inum = key_inum_flash(c, &ino->key);
struct inode *inode;
struct ubifs_inode *ui;
p = &fsckd->inodes.rb_node;
while (*p) {
......@@ -1816,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
if (!fscki)
return ERR_PTR(-ENOMEM);
inode = ilookup(c->vfs_sb, inum);
fscki->inum = inum;
fscki->nlink = le32_to_cpu(ino->nlink);
fscki->size = le64_to_cpu(ino->size);
fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
fscki->mode = le32_to_cpu(ino->mode);
/*
* If the inode is present in the VFS inode cache, use it instead of
* the on-flash inode which might be out-of-date. E.g., the size might
* be out-of-date. If we do not do this, the following may happen, for
* example:
* 1. A power cut happens
* 2. We mount the file-system R/O, the replay process fixes up the
* inode size in the VFS cache, but on on-flash.
* 3. 'check_leaf()' fails because it hits a data node beyond inode
* size.
*/
if (!inode) {
fscki->nlink = le32_to_cpu(ino->nlink);
fscki->size = le64_to_cpu(ino->size);
fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
fscki->mode = le32_to_cpu(ino->mode);
} else {
ui = ubifs_inode(inode);
fscki->nlink = inode->i_nlink;
fscki->size = inode->i_size;
fscki->xattr_cnt = ui->xattr_cnt;
fscki->xattr_sz = ui->xattr_size;
fscki->xattr_nms = ui->xattr_names;
fscki->mode = inode->i_mode;
iput(inode);
}
if (S_ISDIR(fscki->mode)) {
fscki->calc_sz = UBIFS_INO_NODE_SZ;
fscki->calc_cnt = 2;
}
rb_link_node(&fscki->rb, parent, p);
rb_insert_color(&fscki->rb, &fsckd->inodes);
return fscki;
}
......@@ -2421,7 +2474,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
hashb = key_block(c, &sb->key);
if (hasha > hashb) {
ubifs_err("larger hash %u goes before %u", hasha, hashb);
ubifs_err("larger hash %u goes before %u",
hasha, hashb);
goto error_dump;
}
}
......@@ -2437,14 +2491,12 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
return 0;
}
static int invocation_cnt;
int dbg_force_in_the_gaps(void)
{
if (!dbg_force_in_the_gaps_enabled)
if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
return 0;
/* Force in-the-gaps every 8th commit */
return !((invocation_cnt++) & 0x7);
return !(random32() & 7);
}
/* Failure mode for recovery testing */
......@@ -2632,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
int len, int check)
{
if (in_failure_mode(desc))
return -EIO;
return -EROFS;
return ubi_leb_read(desc, lnum, buf, offset, len, check);
}
......@@ -2642,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
int err, failing;
if (in_failure_mode(desc))
return -EIO;
return -EROFS;
failing = do_fail(desc, lnum, 1);
if (failing)
cut_data(buf, len);
......@@ -2650,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
if (err)
return err;
if (failing)
return -EIO;
return -EROFS;
return 0;
}
......@@ -2660,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
int err;
if (do_fail(desc, lnum, 1))
return -EIO;
return -EROFS;
err = ubi_leb_change(desc, lnum, buf, len, dtype);
if (err)
return err;
if (do_fail(desc, lnum, 1))
return -EIO;
return -EROFS;
return 0;
}
......@@ -2674,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
int err;
if (do_fail(desc, lnum, 0))
return -EIO;
return -EROFS;
err = ubi_leb_erase(desc, lnum);
if (err)
return err;
if (do_fail(desc, lnum, 0))
return -EIO;
return -EROFS;
return 0;
}
......@@ -2688,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
int err;
if (do_fail(desc, lnum, 0))
return -EIO;
return -EROFS;
err = ubi_leb_unmap(desc, lnum);
if (err)
return err;
if (do_fail(desc, lnum, 0))
return -EIO;
return -EROFS;
return 0;
}
int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
{
if (in_failure_mode(desc))
return -EIO;
return -EROFS;
return ubi_is_mapped(desc, lnum);
}
......@@ -2709,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
int err;
if (do_fail(desc, lnum, 0))
return -EIO;
return -EROFS;
err = ubi_leb_map(desc, lnum, dtype);
if (err)
return err;
if (do_fail(desc, lnum, 0))
return -EIO;
return -EROFS;
return 0;
}
......@@ -2784,7 +2836,7 @@ void dbg_debugfs_exit(void)
static int open_debugfs_file(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
return nonseekable_open(inode, file);
}
static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
......@@ -2795,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
if (file->f_path.dentry == d->dfs_dump_lprops)
dbg_dump_lprops(c);
else if (file->f_path.dentry == d->dfs_dump_budg) {
spin_lock(&c->space_lock);
dbg_dump_budg(c);
spin_unlock(&c->space_lock);
} else if (file->f_path.dentry == d->dfs_dump_tnc) {
else if (file->f_path.dentry == d->dfs_dump_budg)
dbg_dump_budg(c, &c->bi);
else if (file->f_path.dentry == d->dfs_dump_tnc) {
mutex_lock(&c->tnc_mutex);
dbg_dump_tnc(c);
mutex_unlock(&c->tnc_mutex);
} else
return -EINVAL;
*ppos += count;
return count;
}
......@@ -2814,7 +2863,7 @@ static const struct file_operations dfs_fops = {
.open = open_debugfs_file,
.write = write_debugfs_file,
.owner = THIS_MODULE,
.llseek = default_llseek,
.llseek = no_llseek,
};
/**
......
......@@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
#ifdef CONFIG_UBIFS_FS_DEBUG
#include <linux/random.h>
/**
* ubifs_debug_info - per-FS debugging information.
* @old_zroot: old index root - used by 'dbg_check_old_index()'
......@@ -50,13 +52,15 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
* @new_ihead_offs: used by debugging to check @c->ihead_offs
*
* @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
* @saved_free: saved free space (used by 'dbg_save_space_info()')
* @saved_bi: saved budgeting information
* @saved_free: saved amount of free space
* @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
*
* dfs_dir_name: name of debugfs directory containing this file-system's files
* dfs_dir: direntry object of the file-system debugfs directory
* dfs_dump_lprops: "dump lprops" debugfs knob
* dfs_dump_budg: "dump budgeting information" debugfs knob
* dfs_dump_tnc: "dump TNC" debugfs knob
* @dfs_dir_name: name of debugfs directory containing this file-system's files
* @dfs_dir: direntry object of the file-system debugfs directory
* @dfs_dump_lprops: "dump lprops" debugfs knob
* @dfs_dump_budg: "dump budgeting information" debugfs knob
* @dfs_dump_tnc: "dump TNC" debugfs knob
*/
struct ubifs_debug_info {
struct ubifs_zbranch old_zroot;
......@@ -76,7 +80,9 @@ struct ubifs_debug_info {
int new_ihead_offs;
struct ubifs_lp_stats saved_lst;
struct ubifs_budg_info saved_bi;
long long saved_free;
int saved_idx_gc_cnt;
char dfs_dir_name[100];
struct dentry *dfs_dir;
......@@ -101,23 +107,7 @@ struct ubifs_debug_info {
} \
} while (0)
#define dbg_dump_stack() do { \
if (!dbg_failure_mode) \
dump_stack(); \
} while (0)
/* Generic debugging messages */
#define dbg_msg(fmt, ...) do { \
spin_lock(&dbg_lock); \
printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
__func__, ##__VA_ARGS__); \
spin_unlock(&dbg_lock); \
} while (0)
#define dbg_do_msg(typ, fmt, ...) do { \
if (ubifs_msg_flags & typ) \
dbg_msg(fmt, ##__VA_ARGS__); \
} while (0)
#define dbg_dump_stack() dump_stack()
#define dbg_err(fmt, ...) do { \
spin_lock(&dbg_lock); \
......@@ -137,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c,
#define DBGKEY(key) dbg_key_str0(c, (key))
#define DBGKEY1(key) dbg_key_str1(c, (key))
/* General messages */
#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
#define ubifs_dbg_msg(type, fmt, ...) do { \
spin_lock(&dbg_lock); \
pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
spin_unlock(&dbg_lock); \
} while (0)
/* Just a debugging messages not related to any specific UBIFS subsystem */
#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
/* General messages */
#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
/* Additional journal messages */
#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
/* Additional TNC messages */
#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
/* Additional lprops messages */
#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
/* Additional LEB find messages */
#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
/* Additional mount messages */
#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
/* Additional I/O messages */
#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
/* Additional commit messages */
#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
/* Additional budgeting messages */
#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
/* Additional log messages */
#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
/* Additional gc messages */
#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
/* Additional scan messages */
#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
/* Additional recovery messages */
#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
/*
* Debugging message type flags.
*
* UBIFS_MSG_GEN: general messages
* UBIFS_MSG_JNL: journal messages
* UBIFS_MSG_MNT: mount messages
* UBIFS_MSG_CMT: commit messages
* UBIFS_MSG_FIND: LEB find messages
* UBIFS_MSG_BUDG: budgeting messages
* UBIFS_MSG_GC: garbage collection messages
* UBIFS_MSG_TNC: TNC messages
* UBIFS_MSG_LP: lprops messages
* UBIFS_MSG_IO: I/O messages
* UBIFS_MSG_LOG: log messages
* UBIFS_MSG_SCAN: scan messages
* UBIFS_MSG_RCVRY: recovery messages
*/
enum {
UBIFS_MSG_GEN = 0x1,
UBIFS_MSG_JNL = 0x2,
UBIFS_MSG_MNT = 0x4,
UBIFS_MSG_CMT = 0x8,
UBIFS_MSG_FIND = 0x10,
UBIFS_MSG_BUDG = 0x20,
UBIFS_MSG_GC = 0x40,
UBIFS_MSG_TNC = 0x80,
UBIFS_MSG_LP = 0x100,
UBIFS_MSG_IO = 0x200,
UBIFS_MSG_LOG = 0x400,
UBIFS_MSG_SCAN = 0x800,
UBIFS_MSG_RCVRY = 0x1000,
};
#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
/*
* Debugging check flags.
......@@ -233,11 +186,9 @@ enum {
/*
* Special testing flags.
*
* UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
* UBIFS_TST_RCVRY: failure mode for recovery testing
*/
enum {
UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
UBIFS_TST_RCVRY = 0x4,
};
......@@ -262,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
int offs);
void dbg_dump_budget_req(const struct ubifs_budget_req *req);
void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
void dbg_dump_budg(struct ubifs_info *c);
void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
void dbg_dump_lprops(struct ubifs_info *c);
void dbg_dump_lpt_info(struct ubifs_info *c);
......@@ -304,18 +255,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
/* Force the use of in-the-gaps method for testing */
#define dbg_force_in_the_gaps_enabled \
(ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
static inline int dbg_force_in_the_gaps_enabled(void)
{
return ubifs_chk_flags & UBIFS_CHK_GEN;
}
int dbg_force_in_the_gaps(void);
/* Failure mode for recovery testing */
#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
#ifndef UBIFS_DBG_PRESERVE_UBI
#define ubi_leb_read dbg_leb_read
#define ubi_leb_write dbg_leb_write
#define ubi_leb_change dbg_leb_change
......@@ -323,7 +272,6 @@ int dbg_force_in_the_gaps(void);
#define ubi_leb_unmap dbg_leb_unmap
#define ubi_is_mapped dbg_is_mapped
#define ubi_leb_map dbg_leb_map
#endif
int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
......@@ -370,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
__func__, __LINE__, current->pid); \
} while (0)
#define dbg_err(fmt, ...) do { \
if (0) \
ubifs_err(fmt, ##__VA_ARGS__); \
#define dbg_err(fmt, ...) do { \
if (0) \
ubifs_err(fmt, ##__VA_ARGS__); \
} while (0)
#define dbg_msg(fmt, ...) do { \
if (0) \
printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \
current->pid, __func__, ##__VA_ARGS__); \
#define ubifs_dbg_msg(fmt, ...) do { \
if (0) \
pr_debug(fmt "\n", ##__VA_ARGS__); \
} while (0)
#define dbg_dump_stack()
#define ubifs_assert_cmt_locked(c)
#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
#define DBGKEY(key) ((char *)(key))
#define DBGKEY1(key) ((char *)(key))
......@@ -420,7 +368,9 @@ static inline void
dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
static inline void
dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
static inline void dbg_dump_budg(struct ubifs_info *c) { return; }
static inline void
dbg_dump_budg(struct ubifs_info *c,
const struct ubifs_budg_info *bi) { return; }
static inline void dbg_dump_lprop(const struct ubifs_info *c,
const struct ubifs_lprops *lp) { return; }
static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
......@@ -482,8 +432,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c,
struct list_head *head) { return 0; }
static inline int dbg_force_in_the_gaps(void) { return 0; }
#define dbg_force_in_the_gaps_enabled 0
#define dbg_failure_mode 0
#define dbg_force_in_the_gaps_enabled() 0
#define dbg_failure_mode 0
static inline int dbg_debugfs_init(void) { return 0; }
static inline void dbg_debugfs_exit(void) { return; }
......
......@@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
ubifs_release_budget(c, &req);
else {
/* We've deleted something - clean the "no space" flags */
c->nospace = c->nospace_rp = 0;
c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
return 0;
......@@ -693,7 +693,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
ubifs_release_budget(c, &req);
else {
/* We've deleted something - clean the "no space" flags */
c->nospace = c->nospace_rp = 0;
c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
return 0;
......
......@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c)
*/
static void release_existing_page_budget(struct ubifs_info *c)
{
struct ubifs_budget_req req = { .dd_growth = c->page_budget};
struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
ubifs_release_budget(c, &req);
}
......@@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len)
* the page locked, and it locks @ui_mutex. However, write-back does take inode
* @i_mutex, which means other VFS operations may be run on this inode at the
* same time. And the problematic one is truncation to smaller size, from where
* we have to call 'truncate_setsize()', which first changes @inode->i_size, then
* drops the truncated pages. And while dropping the pages, it takes the page
* lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
* @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
* means that @inode->i_size is changed while @ui_mutex is unlocked.
* we have to call 'truncate_setsize()', which first changes @inode->i_size,
* then drops the truncated pages. And while dropping the pages, it takes the
* page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
* with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
* This means that @inode->i_size is changed while @ui_mutex is unlocked.
*
* XXX(truncate): with the new truncate sequence this is not true anymore,
* and the calls to truncate_setsize can be move around freely. They should
......@@ -1189,7 +1189,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
if (budgeted)
ubifs_release_budget(c, &req);
else {
c->nospace = c->nospace_rp = 0;
c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
return err;
......@@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync)
dbg_gen("syncing inode %lu", inode->i_ino);
if (inode->i_sb->s_flags & MS_RDONLY)
if (c->ro_mount)
/*
* For some really strange reasons VFS does not filter out
* 'fsync()' for R/O mounted file-systems as per 2.6.39.
*/
return 0;
/*
......@@ -1432,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
}
/*
* mmap()d file has taken write protection fault and is being made
* writable. UBIFS must ensure page is budgeted for.
* mmap()d file has taken write protection fault and is being made writable.
* UBIFS must ensure page is budgeted for.
*/
static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
......@@ -1536,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
int err;
/* 'generic_file_mmap()' takes care of NOMMU case */
err = generic_file_mmap(file, vma);
if (err)
return err;
......
......@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
* But if the index takes fewer LEBs than it is reserved for it,
* this function must avoid picking those reserved LEBs.
*/
if (c->min_idx_lebs >= c->lst.idx_lebs) {
rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
exclude_index = 1;
}
spin_unlock(&c->space_lock);
......@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
pick_free = 0;
} else {
spin_lock(&c->space_lock);
exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
spin_unlock(&c->space_lock);
}
......@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
/* Check if there are enough empty LEBs for commit */
spin_lock(&c->space_lock);
if (c->min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
if (c->bi.min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
......
......@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c)
if (err)
return err;
err = ubifs_wbuf_sync_nolock(wbuf);
if (err)
return err;
err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
if (err)
return err;
......@@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c)
* This function compares data nodes @a and @b. Returns %1 if @a has greater
* inode or block number, and %-1 otherwise.
*/
int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
{
ino_t inuma, inumb;
struct ubifs_info *c = priv;
......@@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
* first and sorted by length in descending order. Directory entry nodes go
* after inode nodes and are sorted in ascending hash valuer order.
*/
int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
static int nondata_nodes_cmp(void *priv, struct list_head *a,
struct list_head *b)
{
ino_t inuma, inumb;
struct ubifs_info *c = priv;
......@@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
ubifs_assert(c->gc_lnum != lnum);
ubifs_assert(wbuf->lnum != lnum);
if (lp->free + lp->dirty == c->leb_size) {
/* Special case - a free LEB */
dbg_gc("LEB %d is free, return it", lp->lnum);
ubifs_assert(!(lp->flags & LPROPS_INDEX));
if (lp->free != c->leb_size) {
/*
* Write buffers must be sync'd before unmapping
* freeable LEBs, because one of them may contain data
* which obsoletes something in 'lp->pnum'.
*/
err = gc_sync_wbufs(c);
if (err)
return err;
err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
0, 0, 0, 0);
if (err)
return err;
}
err = ubifs_leb_unmap(c, lp->lnum);
if (err)
return err;
if (c->gc_lnum == -1) {
c->gc_lnum = lnum;
return LEB_RETAINED;
}
return LEB_FREED;
}
/*
* We scan the entire LEB even though we only really need to scan up to
* (c->leb_size - lp->free).
......@@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
"(min. space %d)", lp.lnum, lp.free, lp.dirty,
lp.free + lp.dirty, min_space);
if (lp.free + lp.dirty == c->leb_size) {
/* An empty LEB was returned */
dbg_gc("LEB %d is free, return it", lp.lnum);
/*
* ubifs_find_dirty_leb() doesn't return freeable index
* LEBs.
*/
ubifs_assert(!(lp.flags & LPROPS_INDEX));
if (lp.free != c->leb_size) {
/*
* Write buffers must be sync'd before
* unmapping freeable LEBs, because one of them
* may contain data which obsoletes something
* in 'lp.pnum'.
*/
ret = gc_sync_wbufs(c);
if (ret)
goto out;
ret = ubifs_change_one_lp(c, lp.lnum,
c->leb_size, 0, 0, 0,
0);
if (ret)
goto out;
}
ret = ubifs_leb_unmap(c, lp.lnum);
if (ret)
goto out;
ret = lp.lnum;
break;
}
space_before = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum == -1)
space_before = 0;
......
......@@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
ubifs_assert(wbuf->size % c->min_io_size == 0);
ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->leb_size - wbuf->offs >= c->max_write_size)
ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
if (c->ro_error)
return -EROFS;
......@@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
* @dtype: data type
*
* This function targets the write-buffer to logical eraseblock @lnum:@offs.
* The write-buffer is synchronized if it is not empty. Returns zero in case of
* success and a negative error code in case of failure.
* The write-buffer has to be empty. Returns zero in case of success and a
* negative error code in case of failure.
*/
int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
int dtype)
......@@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
ubifs_assert(offs >= 0 && offs <= c->leb_size);
ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
ubifs_assert(lnum != wbuf->lnum);
if (wbuf->used > 0) {
int err = ubifs_wbuf_sync_nolock(wbuf);
if (err)
return err;
}
ubifs_assert(wbuf->used == 0);
spin_lock(&wbuf->lock);
wbuf->lnum = lnum;
......@@ -573,7 +567,7 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
{
struct ubifs_info *c = wbuf->c;
int err, written, n, aligned_len = ALIGN(len, 8), offs;
int err, written, n, aligned_len = ALIGN(len, 8);
dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
dbg_ntype(((struct ubifs_ch *)buf)->node_type),
......@@ -588,7 +582,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->leb_size - wbuf->offs >= c->max_write_size)
ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
err = -ENOSPC;
......@@ -636,7 +630,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
goto exit;
}
offs = wbuf->offs;
written = 0;
if (wbuf->used) {
......@@ -653,7 +646,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
if (err)
goto out;
offs += wbuf->size;
wbuf->offs += wbuf->size;
len -= wbuf->avail;
aligned_len -= wbuf->avail;
written += wbuf->avail;
......@@ -672,7 +665,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
if (err)
goto out;
offs += wbuf->size;
wbuf->offs += wbuf->size;
len -= wbuf->size;
aligned_len -= wbuf->size;
written += wbuf->size;
......@@ -687,12 +680,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
n = aligned_len >> c->max_write_shift;
if (n) {
n <<= c->max_write_shift;
dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
wbuf->dtype);
dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
wbuf->offs);
err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
wbuf->offs, n, wbuf->dtype);
if (err)
goto out;
offs += n;
wbuf->offs += n;
aligned_len -= n;
len -= n;
written += n;
......@@ -707,7 +701,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
*/
memcpy(wbuf->buf, buf + written, len);
wbuf->offs = offs;
if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
else
......
......@@ -141,14 +141,8 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
* LEB with some empty space.
*/
lnum = ubifs_find_free_space(c, len, &offs, squeeze);
if (lnum >= 0) {
/* Found an LEB, add it to the journal head */
err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
if (err)
goto out_return;
/* A new bud was successfully allocated and added to the log */
if (lnum >= 0)
goto out;
}
err = lnum;
if (err != -ENOSPC)
......@@ -203,12 +197,23 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
return 0;
}
err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
if (err)
goto out_return;
offs = 0;
out:
/*
* Make sure we synchronize the write-buffer before we add the new bud
* to the log. Otherwise we may have a power cut after the log
* reference node for the last bud (@lnum) is written but before the
* write-buffer data are written to the next-to-last bud
* (@wbuf->lnum). And the effect would be that the recovery would see
* that there is corruption in the next-to-last bud.
*/
err = ubifs_wbuf_sync_nolock(wbuf);
if (err)
goto out_return;
err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
if (err)
goto out_return;
err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
if (err)
goto out_unlock;
......@@ -380,10 +385,8 @@ static int make_reservation(struct ubifs_info *c, int jhead, int len)
if (err == -ENOSPC) {
/* This are some budgeting problems, print useful information */
down_write(&c->commit_sem);
spin_lock(&c->space_lock);
dbg_dump_stack();
dbg_dump_budg(c);
spin_unlock(&c->space_lock);
dbg_dump_budg(c, &c->bi);
dbg_dump_lprops(c);
cmt_retries = dbg_check_lprops(c);
up_write(&c->commit_sem);
......
......@@ -99,20 +99,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
return NULL;
}
/**
* next_log_lnum - switch to the next log LEB.
* @c: UBIFS file-system description object
* @lnum: current log LEB
*/
static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
{
lnum += 1;
if (lnum > c->log_last)
lnum = UBIFS_LOG_LNUM;
return lnum;
}
/**
* empty_log_bytes - calculate amount of empty space in the log.
* @c: UBIFS file-system description object
......@@ -257,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
ref->jhead = cpu_to_le32(jhead);
if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
......@@ -425,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
/* Switch to the next log LEB */
if (c->lhead_offs) {
c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
......@@ -446,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
c->lhead_offs += len;
if (c->lhead_offs == c->leb_size) {
c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
......@@ -533,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
}
mutex_lock(&c->log_mutex);
for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
lnum = next_log_lnum(c, lnum)) {
lnum = ubifs_next_log_lnum(c, lnum)) {
dbg_log("unmap log LEB %d", lnum);
err = ubifs_leb_unmap(c, lnum);
if (err)
......@@ -642,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
if (err)
return err;
*lnum = next_log_lnum(c, *lnum);
*lnum = ubifs_next_log_lnum(c, *lnum);
*offs = 0;
}
memcpy(buf + *offs, node, len);
......@@ -712,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
ubifs_scan_destroy(sleb);
if (lnum == c->lhead_lnum)
break;
lnum = next_log_lnum(c, lnum);
lnum = ubifs_next_log_lnum(c, lnum);
}
if (offs) {
int sz = ALIGN(offs, c->min_io_size);
......@@ -732,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
/* Unmap remaining LEBs */
lnum = write_lnum;
do {
lnum = next_log_lnum(c, lnum);
lnum = ubifs_next_log_lnum(c, lnum);
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
......
......@@ -1006,22 +1006,12 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
}
}
/**
* struct scan_check_data - data provided to scan callback function.
* @lst: LEB properties statistics
* @err: error code
*/
struct scan_check_data {
struct ubifs_lp_stats lst;
int err;
};
/**
* scan_check_cb - scan callback.
* @c: the UBIFS file-system description object
* @lp: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
* @data: information passed to and from the caller of the scan
* @lst: lprops statistics to update
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
......@@ -1030,11 +1020,10 @@ struct scan_check_data {
*/
static int scan_check_cb(struct ubifs_info *c,
const struct ubifs_lprops *lp, int in_tree,
struct scan_check_data *data)
struct ubifs_lp_stats *lst)
{
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
struct ubifs_lp_stats *lst = &data->lst;
int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
void *buf = NULL;
......@@ -1044,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c,
if (cat != (lp->flags & LPROPS_CAT_MASK)) {
ubifs_err("bad LEB category %d expected %d",
(lp->flags & LPROPS_CAT_MASK), cat);
goto out;
return -EINVAL;
}
}
......@@ -1078,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c,
}
if (!found) {
ubifs_err("bad LPT list (category %d)", cat);
goto out;
return -EINVAL;
}
}
}
......@@ -1090,45 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c,
if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
lp != heap->arr[lp->hpos]) {
ubifs_err("bad LPT heap (category %d)", cat);
goto out;
return -EINVAL;
}
}
buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
if (!buf) {
ubifs_err("cannot allocate memory to scan LEB %d", lnum);
goto out;
if (!buf)
return -ENOMEM;
/*
* After an unclean unmount, empty and freeable LEBs
* may contain garbage - do not scan them.
*/
if (lp->free == c->leb_size) {
lst->empty_lebs += 1;
lst->total_free += c->leb_size;
lst->total_dark += ubifs_calc_dark(c, c->leb_size);
return LPT_SCAN_CONTINUE;
}
if (lp->free + lp->dirty == c->leb_size &&
!(lp->flags & LPROPS_INDEX)) {
lst->total_free += lp->free;
lst->total_dirty += lp->dirty;
lst->total_dark += ubifs_calc_dark(c, c->leb_size);
return LPT_SCAN_CONTINUE;
}
sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) {
/*
* After an unclean unmount, empty and freeable LEBs
* may contain garbage.
*/
if (lp->free == c->leb_size) {
ubifs_err("scan errors were in empty LEB "
"- continuing checking");
lst->empty_lebs += 1;
lst->total_free += c->leb_size;
lst->total_dark += ubifs_calc_dark(c, c->leb_size);
ret = LPT_SCAN_CONTINUE;
goto exit;
}
if (lp->free + lp->dirty == c->leb_size &&
!(lp->flags & LPROPS_INDEX)) {
ubifs_err("scan errors were in freeable LEB "
"- continuing checking");
lst->total_free += lp->free;
lst->total_dirty += lp->dirty;
lst->total_dark += ubifs_calc_dark(c, c->leb_size);
ret = LPT_SCAN_CONTINUE;
goto exit;
ret = PTR_ERR(sleb);
if (ret == -EUCLEAN) {
dbg_dump_lprops(c);
dbg_dump_budg(c, &c->bi);
}
data->err = PTR_ERR(sleb);
ret = LPT_SCAN_STOP;
goto exit;
goto out;
}
is_idx = -1;
......@@ -1246,10 +1230,8 @@ static int scan_check_cb(struct ubifs_info *c,
}
ubifs_scan_destroy(sleb);
ret = LPT_SCAN_CONTINUE;
exit:
vfree(buf);
return ret;
return LPT_SCAN_CONTINUE;
out_print:
ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
......@@ -1258,10 +1240,10 @@ static int scan_check_cb(struct ubifs_info *c,
dbg_dump_leb(c, lnum);
out_destroy:
ubifs_scan_destroy(sleb);
ret = -EINVAL;
out:
vfree(buf);
data->err = -EINVAL;
return LPT_SCAN_STOP;
return ret;
}
/**
......@@ -1278,8 +1260,7 @@ static int scan_check_cb(struct ubifs_info *c,
int dbg_check_lprops(struct ubifs_info *c)
{
int i, err;
struct scan_check_data data;
struct ubifs_lp_stats *lst = &data.lst;
struct ubifs_lp_stats lst;
if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
return 0;
......@@ -1294,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c)
return err;
}
memset(lst, 0, sizeof(struct ubifs_lp_stats));
data.err = 0;
memset(&lst, 0, sizeof(struct ubifs_lp_stats));
err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
(ubifs_lpt_scan_callback)scan_check_cb,
&data);
&lst);
if (err && err != -ENOSPC)
goto out;
if (data.err) {
err = data.err;
goto out;
}
if (lst->empty_lebs != c->lst.empty_lebs ||
lst->idx_lebs != c->lst.idx_lebs ||
lst->total_free != c->lst.total_free ||
lst->total_dirty != c->lst.total_dirty ||
lst->total_used != c->lst.total_used) {
if (lst.empty_lebs != c->lst.empty_lebs ||
lst.idx_lebs != c->lst.idx_lebs ||
lst.total_free != c->lst.total_free ||
lst.total_dirty != c->lst.total_dirty ||
lst.total_used != c->lst.total_used) {
ubifs_err("bad overall accounting");
ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
"total_free %lld, total_dirty %lld, total_used %lld",
lst->empty_lebs, lst->idx_lebs, lst->total_free,
lst->total_dirty, lst->total_used);
lst.empty_lebs, lst.idx_lebs, lst.total_free,
lst.total_dirty, lst.total_used);
ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
"total_free %lld, total_dirty %lld, total_used %lld",
c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
......@@ -1325,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c)
goto out;
}
if (lst->total_dead != c->lst.total_dead ||
lst->total_dark != c->lst.total_dark) {
if (lst.total_dead != c->lst.total_dead ||
lst.total_dark != c->lst.total_dark) {
ubifs_err("bad dead/dark space accounting");
ubifs_err("calculated: total_dead %lld, total_dark %lld",
lst->total_dead, lst->total_dark);
lst.total_dead, lst.total_dark);
ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
c->lst.total_dead, c->lst.total_dark);
err = -EINVAL;
......
......@@ -29,6 +29,12 @@
#include <linux/slab.h>
#include "ubifs.h"
#ifdef CONFIG_UBIFS_FS_DEBUG
static int dbg_populate_lsave(struct ubifs_info *c);
#else
#define dbg_populate_lsave(c) 0
#endif
/**
* first_dirty_cnode - find first dirty cnode.
* @c: UBIFS file-system description object
......@@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
if (nnode->nbranch[iip].lnum)
break;
}
} while (iip >= UBIFS_LPT_FANOUT);
} while (iip >= UBIFS_LPT_FANOUT);
/* Go right */
nnode = ubifs_get_nnode(c, nnode, iip);
......@@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c)
c->lpt_drty_flgs |= LSAVE_DIRTY;
ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
}
if (dbg_populate_lsave(c))
return;
list_for_each_entry(lprops, &c->empty_list, list) {
c->lsave[cnt++] = lprops->lnum;
if (cnt >= c->lsave_cnt)
......@@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c)
current->pid);
}
/**
* dbg_populate_lsave - debugging version of 'populate_lsave()'
* @c: UBIFS file-system description object
*
* This is a debugging version for 'populate_lsave()' which populates lsave
* with random LEBs instead of useful LEBs, which is good for test coverage.
* Returns zero if lsave has not been populated (this debugging feature is
* disabled) an non-zero if lsave has been populated.
*/
static int dbg_populate_lsave(struct ubifs_info *c)
{
struct ubifs_lprops *lprops;
struct ubifs_lpt_heap *heap;
int i;
if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
return 0;
if (random32() & 3)
return 0;
for (i = 0; i < c->lsave_cnt; i++)
c->lsave[i] = c->main_first;
list_for_each_entry(lprops, &c->empty_list, list)
c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
list_for_each_entry(lprops, &c->freeable_list, list)
c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
list_for_each_entry(lprops, &c->frdi_idx_list, list)
c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
for (i = 0; i < heap->cnt; i++)
c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
heap = &c->lpt_heap[LPROPS_DIRTY - 1];
for (i = 0; i < heap->cnt; i++)
c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
heap = &c->lpt_heap[LPROPS_FREE - 1];
for (i = 0; i < heap->cnt; i++)
c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
return 1;
}
#endif /* CONFIG_UBIFS_FS_DEBUG */
......@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c)
}
main_sz = (long long)c->main_lebs * c->leb_size;
if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
err = 9;
goto out;
}
......@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c)
}
if (c->lst.total_dead + c->lst.total_dark +
c->lst.total_used + c->old_idx_sz > main_sz) {
c->lst.total_used + c->bi.old_idx_sz > main_sz) {
err = 21;
goto out;
}
......@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c)
c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
c->old_idx_sz = le64_to_cpu(c->mst_node->index_size);
c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
......@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c)
c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
c->calc_idx_sz = c->old_idx_sz;
c->calc_idx_sz = c->bi.old_idx_sz;
if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
c->no_orphs = 1;
......
......@@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c)
mutex_unlock(&c->lp_mutex);
}
/**
* ubifs_next_log_lnum - switch to the next log LEB.
* @c: UBIFS file-system description object
* @lnum: current log LEB
*
* This helper function returns the log LEB number which goes next after LEB
* 'lnum'.
*/
static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
{
lnum += 1;
if (lnum > c->log_last)
lnum = UBIFS_LOG_LNUM;
return lnum;
}
#endif /* __UBIFS_MISC_H__ */
......@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c)
sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb)) {
if (PTR_ERR(sleb) == -EUCLEAN)
sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
sleb = ubifs_recover_leb(c, lnum, 0,
c->sbuf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
break;
......
......@@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
}
/**
* drop_incomplete_group - drop nodes from an incomplete group.
* drop_last_node - drop the last node or group of nodes.
* @sleb: scanned LEB information
* @offs: offset of dropped nodes is returned here
* @grouped: non-zero if whole group of nodes have to be dropped
*
* This function returns %1 if nodes are dropped and %0 otherwise.
* This is a helper function for 'ubifs_recover_leb()' which drops the last
* node of the scanned LEB or the last group of nodes if @grouped is not zero.
* This function returns %1 if a node was dropped and %0 otherwise.
*/
static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
{
int dropped = 0;
......@@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
kfree(snod);
sleb->nodes_cnt -= 1;
dropped = 1;
if (!grouped)
break;
}
return dropped;
}
......@@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
int offs, void *sbuf, int grouped)
{
int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
int empty_chkd = 0, start = offs;
int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
struct ubifs_scan_leb *sleb;
void *buf = sbuf + offs;
......@@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
if (IS_ERR(sleb))
return sleb;
if (sleb->ecc)
need_clean = 1;
ubifs_assert(len >= 8);
while (len >= 8) {
int ret;
dbg_scan("look at LEB %d:%d (%d bytes left)",
lnum, offs, len);
......@@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
* Scan quietly until there is an error from which we cannot
* recover
*/
ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
if (ret == SCANNED_A_NODE) {
/* A valid node, and not a padding node */
struct ubifs_ch *ch = buf;
......@@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
offs += node_len;
buf += node_len;
len -= node_len;
continue;
}
if (ret > 0) {
} else if (ret > 0) {
/* Padding bytes or a valid padding node */
offs += ret;
buf += ret;
len -= ret;
continue;
}
if (ret == SCANNED_EMPTY_SPACE) {
if (!is_empty(buf, len)) {
if (!is_last_write(c, buf, offs))
break;
clean_buf(c, &buf, lnum, &offs, &len);
need_clean = 1;
}
empty_chkd = 1;
} else if (ret == SCANNED_EMPTY_SPACE ||
ret == SCANNED_GARBAGE ||
ret == SCANNED_A_BAD_PAD_NODE ||
ret == SCANNED_A_CORRUPT_NODE) {
dbg_rcvry("found corruption - %d", ret);
break;
}
if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
if (is_last_write(c, buf, offs)) {
clean_buf(c, &buf, lnum, &offs, &len);
need_clean = 1;
empty_chkd = 1;
break;
}
if (ret == SCANNED_A_CORRUPT_NODE)
if (no_more_nodes(c, buf, len, lnum, offs)) {
clean_buf(c, &buf, lnum, &offs, &len);
need_clean = 1;
empty_chkd = 1;
break;
}
if (quiet) {
/* Redo the last scan but noisily */
quiet = 0;
continue;
}
switch (ret) {
case SCANNED_GARBAGE:
dbg_err("garbage");
goto corrupted;
case SCANNED_A_CORRUPT_NODE:
case SCANNED_A_BAD_PAD_NODE:
dbg_err("bad node");
goto corrupted;
default:
dbg_err("unknown");
} else {
dbg_err("unexpected return value %d", ret);
err = -EINVAL;
goto error;
}
}
if (!empty_chkd && !is_empty(buf, len)) {
if (is_last_write(c, buf, offs)) {
clean_buf(c, &buf, lnum, &offs, &len);
need_clean = 1;
} else {
if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
if (!is_last_write(c, buf, offs))
goto corrupted_rescan;
} else if (ret == SCANNED_A_CORRUPT_NODE) {
if (!no_more_nodes(c, buf, len, lnum, offs))
goto corrupted_rescan;
} else if (!is_empty(buf, len)) {
if (!is_last_write(c, buf, offs)) {
int corruption = first_non_ff(buf, len);
/*
......@@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
}
}
/* Drop nodes from incomplete group */
if (grouped && drop_incomplete_group(sleb, &offs)) {
buf = sbuf + offs;
len = c->leb_size - offs;
clean_buf(c, &buf, lnum, &offs, &len);
need_clean = 1;
}
min_io_unit = round_down(offs, c->min_io_size);
if (grouped)
/*
* If nodes are grouped, always drop the incomplete group at
* the end.
*/
drop_last_node(sleb, &offs, 1);
if (offs % c->min_io_size) {
clean_buf(c, &buf, lnum, &offs, &len);
need_clean = 1;
}
/*
* While we are in the middle of the same min. I/O unit keep dropping
* nodes. So basically, what we want is to make sure that the last min.
* I/O unit where we saw the corruption is dropped completely with all
* the uncorrupted node which may possibly sit there.
*
* In other words, let's name the min. I/O unit where the corruption
* starts B, and the previous min. I/O unit A. The below code tries to
* deal with a situation when half of B contains valid nodes or the end
* of a valid node, and the second half of B contains corrupted data or
* garbage. This means that UBIFS had been writing to B just before the
* power cut happened. I do not know how realistic is this scenario
* that half of the min. I/O unit had been written successfully and the
* other half not, but this is possible in our 'failure mode emulation'
* infrastructure at least.
*
* So what is the problem, why we need to drop those nodes? Whey can't
* we just clean-up the second half of B by putting a padding node
* there? We can, and this works fine with one exception which was
* reproduced with power cut emulation testing and happens extremely
* rarely. The description follows, but it is worth noting that that is
* only about the GC head, so we could do this trick only if the bud
* belongs to the GC head, but it does not seem to be worth an
* additional "if" statement.
*
* So, imagine the file-system is full, we run GC which is moving valid
* nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head
* LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X
* and will try to continue. Imagine that LEB X is currently the
* dirtiest LEB, and the amount of used space in LEB Y is exactly the
* same as amount of free space in LEB X.
*
* And a power cut happens when nodes are moved from LEB X to LEB Y. We
* are here trying to recover LEB Y which is the GC head LEB. We find
* the min. I/O unit B as described above. Then we clean-up LEB Y by
* padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function
* fails, because it cannot find a dirty LEB which could be GC'd into
* LEB Y! Even LEB X does not match because the amount of valid nodes
* there does not fit the free space in LEB Y any more! And this is
* because of the padding node which we added to LEB Y. The
* user-visible effect of this which I once observed and analysed is
* that we cannot mount the file-system with -ENOSPC error.
*
* So obviously, to make sure that situation does not happen we should
* free min. I/O unit B in LEB Y completely and the last used min. I/O
* unit in LEB Y should be A. This is basically what the below code
* tries to do.
*/
while (min_io_unit == round_down(offs, c->min_io_size) &&
min_io_unit != offs &&
drop_last_node(sleb, &offs, grouped));
buf = sbuf + offs;
len = c->leb_size - offs;
clean_buf(c, &buf, lnum, &offs, &len);
ubifs_end_scan(c, sleb, lnum, offs);
if (need_clean) {
err = fix_unclean_leb(c, sleb, start);
if (err)
goto error;
}
err = fix_unclean_leb(c, sleb, start);
if (err)
goto error;
return sleb;
corrupted_rescan:
/* Re-scan the corrupted data with verbose messages */
dbg_err("corruptio %d", ret);
ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
corrupted:
ubifs_scanned_corruption(c, lnum, offs, buf);
err = -EUCLEAN;
......@@ -1069,6 +1083,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
return 0;
}
/**
* grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
* @c: UBIFS file-system description object
*
* This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
* LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
* zero in case of success and a negative error code in case of failure.
*/
static int grab_empty_leb(struct ubifs_info *c)
{
int lnum, err;
/*
* Note, it is very important to first search for an empty LEB and then
* run the commit, not vice-versa. The reason is that there might be
* only one empty LEB at the moment, the one which has been the
* @c->gc_lnum just before the power cut happened. During the regular
* UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
* one but GC can grab it. But at this moment this single empty LEB is
* not marked as taken, so if we run commit - what happens? Right, the
* commit will grab it and write the index there. Remember that the
* index always expands as long as there is free space, and it only
* starts consolidating when we run out of space.
*
* IOW, if we run commit now, we might not be able to find a free LEB
* after this.
*/
lnum = ubifs_find_free_leb_for_idx(c);
if (lnum < 0) {
dbg_err("could not find an empty LEB");
dbg_dump_lprops(c);
dbg_dump_budg(c, &c->bi);
return lnum;
}
/* Reset the index flag */
err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
LPROPS_INDEX, 0);
if (err)
return err;
c->gc_lnum = lnum;
dbg_rcvry("found empty LEB %d, run commit", lnum);
return ubifs_run_commit(c);
}
/**
* ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
* @c: UBIFS file-system description object
......@@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
{
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
struct ubifs_lprops lp;
int lnum, err;
int err;
dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
c->gc_lnum = -1;
if (wbuf->lnum == -1) {
dbg_rcvry("no GC head LEB");
goto find_free;
}
/*
* See whether the used space in the dirtiest LEB fits in the GC head
* LEB.
*/
if (wbuf->offs == c->leb_size) {
dbg_rcvry("no room in GC head LEB");
goto find_free;
}
if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
return grab_empty_leb(c);
err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
if (err) {
/*
* There are no dirty or empty LEBs subject to here being
* enough for the index. Try to use
* 'ubifs_find_free_leb_for_idx()', which will return any empty
* LEBs (ignoring index requirements). If the index then
* doesn't have enough LEBs the recovery commit will fail -
* which is the same result anyway i.e. recovery fails. So
* there is no problem ignoring index requirements and just
* grabbing a free LEB since we have already established there
* is not a dirty LEB we could have used instead.
*/
if (err == -ENOSPC) {
dbg_rcvry("could not find a dirty LEB");
goto find_free;
}
return err;
}
ubifs_assert(!(lp.flags & LPROPS_INDEX));
lnum = lp.lnum;
if (lp.free + lp.dirty == c->leb_size) {
/* An empty LEB was returned */
if (lp.free != c->leb_size) {
err = ubifs_change_one_lp(c, lnum, c->leb_size,
0, 0, 0, 0);
if (err)
return err;
}
err = ubifs_leb_unmap(c, lnum);
if (err)
if (err != -ENOSPC)
return err;
c->gc_lnum = lnum;
dbg_rcvry("allocated LEB %d for GC", lnum);
/* Run the commit */
dbg_rcvry("committing");
return ubifs_run_commit(c);
}
/*
* There was no empty LEB so the used space in the dirtiest LEB must fit
* in the GC head LEB.
*/
if (lp.free + lp.dirty < wbuf->offs) {
dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
lnum, wbuf->lnum, wbuf->offs);
err = ubifs_return_leb(c, lnum);
if (err)
return err;
goto find_free;
dbg_rcvry("could not find a dirty LEB");
return grab_empty_leb(c);
}
ubifs_assert(!(lp.flags & LPROPS_INDEX));
ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
/*
* We run the commit before garbage collection otherwise subsequent
* mounts will see the GC and orphan deletion in a different order.
......@@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
err = ubifs_run_commit(c);
if (err)
return err;
/*
* The data in the dirtiest LEB fits in the GC head LEB, so do the GC
* - use locking to keep 'ubifs_assert()' happy.
*/
dbg_rcvry("GC'ing LEB %d", lnum);
dbg_rcvry("GC'ing LEB %d", lp.lnum);
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
err = ubifs_garbage_collect_leb(c, &lp);
if (err >= 0) {
......@@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
err = -EINVAL;
return err;
}
if (err != LEB_RETAINED) {
dbg_err("GC returned %d", err);
ubifs_assert(err == LEB_RETAINED);
if (err != LEB_RETAINED)
return -EINVAL;
}
err = ubifs_leb_unmap(c, c->gc_lnum);
if (err)
return err;
dbg_rcvry("allocated LEB %d for GC", lnum);
return 0;
find_free:
/*
* There is no GC head LEB or the free space in the GC head LEB is too
* small, or there are not dirty LEBs. Allocate gc_lnum by calling
* 'ubifs_find_free_leb_for_idx()' so GC is not run.
*/
lnum = ubifs_find_free_leb_for_idx(c);
if (lnum < 0) {
dbg_err("could not find an empty LEB");
return lnum;
}
/* And reset the index flag */
err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
LPROPS_INDEX, 0);
if (err)
return err;
c->gc_lnum = lnum;
dbg_rcvry("allocated LEB %d for GC", lnum);
/* Run the commit */
dbg_rcvry("committing");
return ubifs_run_commit(c);
dbg_rcvry("allocated LEB %d for GC", lp.lnum);
return 0;
}
/**
......@@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
if (err)
goto out;
dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
(unsigned long)e->inum, lnum, offs, i_size, e->d_size);
return 0;
......@@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c)
e->i_size = le64_to_cpu(ino->size);
}
}
if (e->exists && e->i_size < e->d_size) {
if (!e->inode && c->ro_mount) {
if (c->ro_mount) {
/* Fix the inode size and pin it in memory */
struct inode *inode;
struct ubifs_inode *ui;
ubifs_assert(!e->inode);
inode = ubifs_iget(c->vfs_sb, e->inum);
if (IS_ERR(inode))
return PTR_ERR(inode);
ui = ubifs_inode(inode);
if (inode->i_size < e->d_size) {
dbg_rcvry("ino %lu size %lld -> %lld",
(unsigned long)e->inum,
e->d_size, inode->i_size);
inode->i_size, e->d_size);
inode->i_size = e->d_size;
ubifs_inode(inode)->ui_size = e->d_size;
ui->ui_size = e->d_size;
ui->synced_i_size = e->d_size;
e->inode = inode;
this = rb_next(this);
continue;
......@@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c)
iput(e->inode);
}
}
this = rb_next(this);
rb_erase(&e->rb, &c->size_tree);
kfree(e);
}
return 0;
}
......@@ -33,44 +33,32 @@
*/
#include "ubifs.h"
/*
* Replay flags.
*
* REPLAY_DELETION: node was deleted
* REPLAY_REF: node is a reference node
*/
enum {
REPLAY_DELETION = 1,
REPLAY_REF = 2,
};
#include <linux/list_sort.h>
/**
* struct replay_entry - replay tree entry.
* struct replay_entry - replay list entry.
* @lnum: logical eraseblock number of the node
* @offs: node offset
* @len: node length
* @deletion: non-zero if this entry corresponds to a node deletion
* @sqnum: node sequence number
* @flags: replay flags
* @rb: links the replay tree
* @list: links the replay list
* @key: node key
* @nm: directory entry name
* @old_size: truncation old size
* @new_size: truncation new size
* @free: amount of free space in a bud
* @dirty: amount of dirty space in a bud from padding and deletion nodes
* @jhead: journal head number of the bud
*
* UBIFS journal replay must compare node sequence numbers, which means it must
* build a tree of node information to insert into the TNC.
* The replay process first scans all buds and builds the replay list, then
* sorts the replay list in nodes sequence number order, and then inserts all
* the replay entries to the TNC.
*/
struct replay_entry {
int lnum;
int offs;
int len;
unsigned int deletion:1;
unsigned long long sqnum;
int flags;
struct rb_node rb;
struct list_head list;
union ubifs_key key;
union {
struct qstr nm;
......@@ -78,11 +66,6 @@ struct replay_entry {
loff_t old_size;
loff_t new_size;
};
struct {
int free;
int dirty;
int jhead;
};
};
};
......@@ -90,57 +73,64 @@ struct replay_entry {
* struct bud_entry - entry in the list of buds to replay.
* @list: next bud in the list
* @bud: bud description object
* @free: free bytes in the bud
* @sqnum: reference node sequence number
* @free: free bytes in the bud
* @dirty: dirty bytes in the bud
*/
struct bud_entry {
struct list_head list;
struct ubifs_bud *bud;
int free;
unsigned long long sqnum;
int free;
int dirty;
};
/**
* set_bud_lprops - set free and dirty space used by a bud.
* @c: UBIFS file-system description object
* @r: replay entry of bud
* @b: bud entry which describes the bud
*
* This function makes sure the LEB properties of bud @b are set correctly
* after the replay. Returns zero in case of success and a negative error code
* in case of failure.
*/
static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
{
const struct ubifs_lprops *lp;
int err = 0, dirty;
ubifs_get_lprops(c);
lp = ubifs_lpt_lookup_dirty(c, r->lnum);
lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
dirty = lp->dirty;
if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
/*
* The LEB was added to the journal with a starting offset of
* zero which means the LEB must have been empty. The LEB
* property values should be lp->free == c->leb_size and
* lp->dirty == 0, but that is not the case. The reason is that
* the LEB was garbage collected. The garbage collector resets
* the free and dirty space without recording it anywhere except
* lprops, so if there is not a commit then lprops does not have
* that information next time the file system is mounted.
* property values should be @lp->free == @c->leb_size and
* @lp->dirty == 0, but that is not the case. The reason is that
* the LEB had been garbage collected before it became the bud,
* and there was not commit inbetween. The garbage collector
* resets the free and dirty space without recording it
* anywhere except lprops, so if there was no commit then
* lprops does not have that information.
*
* We do not need to adjust free space because the scan has told
* us the exact value which is recorded in the replay entry as
* r->free.
* @b->free.
*
* However we do need to subtract from the dirty space the
* amount of space that the garbage collector reclaimed, which
* is the whole LEB minus the amount of space that was free.
*/
dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
lp->free, lp->dirty);
dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
lp->free, lp->dirty);
dirty -= c->leb_size - lp->free;
/*
......@@ -152,10 +142,10 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
*/
if (dirty != 0)
dbg_msg("LEB %d lp: %d free %d dirty "
"replay: %d free %d dirty", r->lnum, lp->free,
lp->dirty, r->free, r->dirty);
"replay: %d free %d dirty", b->bud->lnum,
lp->free, lp->dirty, b->free, b->dirty);
}
lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
lp->flags | LPROPS_TAKEN, 0);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
......@@ -163,14 +153,36 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
}
/* Make sure the journal head points to the latest bud */
err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum,
c->leb_size - r->free, UBI_SHORTTERM);
err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
b->bud->lnum, c->leb_size - b->free,
UBI_SHORTTERM);
out:
ubifs_release_lprops(c);
return err;
}
/**
* set_buds_lprops - set free and dirty space for all replayed buds.
* @c: UBIFS file-system description object
*
* This function sets LEB properties for all replayed buds. Returns zero in
* case of success and a negative error code in case of failure.
*/
static int set_buds_lprops(struct ubifs_info *c)
{
struct bud_entry *b;
int err;
list_for_each_entry(b, &c->replay_buds, list) {
err = set_bud_lprops(c, b);
if (err)
return err;
}
return 0;
}
/**
* trun_remove_range - apply a replay entry for a truncation to the TNC.
* @c: UBIFS file-system description object
......@@ -207,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
*/
static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
{
int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
int err;
dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
/* Set c->replay_sqnum to help deal with dangling branches. */
c->replay_sqnum = r->sqnum;
if (r->flags & REPLAY_REF)
err = set_bud_lprops(c, r);
else if (is_hash_key(c, &r->key)) {
if (deletion)
if (is_hash_key(c, &r->key)) {
if (r->deletion)
err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
else
err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
r->len, &r->nm);
} else {
if (deletion)
if (r->deletion)
switch (key_type(c, &r->key)) {
case UBIFS_INO_KEY:
{
......@@ -247,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
return err;
if (c->need_recovery)
err = ubifs_recover_size_accum(c, &r->key, deletion,
err = ubifs_recover_size_accum(c, &r->key, r->deletion,
r->new_size);
}
......@@ -255,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
}
/**
* destroy_replay_tree - destroy the replay.
* @c: UBIFS file-system description object
* replay_entries_cmp - compare 2 replay entries.
* @priv: UBIFS file-system description object
* @a: first replay entry
* @a: second replay entry
*
* Destroy the replay tree.
* This is a comparios function for 'list_sort()' which compares 2 replay
* entries @a and @b by comparing their sequence numer. Returns %1 if @a has
* greater sequence number and %-1 otherwise.
*/
static void destroy_replay_tree(struct ubifs_info *c)
static int replay_entries_cmp(void *priv, struct list_head *a,
struct list_head *b)
{
struct rb_node *this = c->replay_tree.rb_node;
struct replay_entry *r;
while (this) {
if (this->rb_left) {
this = this->rb_left;
continue;
} else if (this->rb_right) {
this = this->rb_right;
continue;
}
r = rb_entry(this, struct replay_entry, rb);
this = rb_parent(this);
if (this) {
if (this->rb_left == &r->rb)
this->rb_left = NULL;
else
this->rb_right = NULL;
}
if (is_hash_key(c, &r->key))
kfree(r->nm.name);
kfree(r);
}
c->replay_tree = RB_ROOT;
struct replay_entry *ra, *rb;
cond_resched();
if (a == b)
return 0;
ra = list_entry(a, struct replay_entry, list);
rb = list_entry(b, struct replay_entry, list);
ubifs_assert(ra->sqnum != rb->sqnum);
if (ra->sqnum > rb->sqnum)
return 1;
return -1;
}
/**
* apply_replay_tree - apply the replay tree to the TNC.
* apply_replay_list - apply the replay list to the TNC.
* @c: UBIFS file-system description object
*
* Apply the replay tree.
* Returns zero in case of success and a negative error code in case of
* failure.
* Apply all entries in the replay list to the TNC. Returns zero in case of
* success and a negative error code in case of failure.
*/
static int apply_replay_tree(struct ubifs_info *c)
static int apply_replay_list(struct ubifs_info *c)
{
struct rb_node *this = rb_first(&c->replay_tree);
struct replay_entry *r;
int err;
while (this) {
struct replay_entry *r;
int err;
list_sort(c, &c->replay_list, &replay_entries_cmp);
list_for_each_entry(r, &c->replay_list, list) {
cond_resched();
r = rb_entry(this, struct replay_entry, rb);
err = apply_replay_entry(c, r);
if (err)
return err;
this = rb_next(this);
}
return 0;
}
/**
* insert_node - insert a node to the replay tree.
* destroy_replay_list - destroy the replay.
* @c: UBIFS file-system description object
*
* Destroy the replay list.
*/
static void destroy_replay_list(struct ubifs_info *c)
{
struct replay_entry *r, *tmp;
list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
if (is_hash_key(c, &r->key))
kfree(r->nm.name);
list_del(&r->list);
kfree(r);
}
}
/**
* insert_node - insert a node to the replay list
* @c: UBIFS file-system description object
* @lnum: node logical eraseblock number
* @offs: node offset
......@@ -328,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
* @old_size: truncation old size
* @new_size: truncation new size
*
* This function inserts a scanned non-direntry node to the replay tree. The
* replay tree is an RB-tree containing @struct replay_entry elements which are
* indexed by the sequence number. The replay tree is applied at the very end
* of the replay process. Since the tree is sorted in sequence number order,
* the older modifications are applied first. This function returns zero in
* case of success and a negative error code in case of failure.
* This function inserts a scanned non-direntry node to the replay list. The
* replay list contains @struct replay_entry elements, and we sort this list in
* sequence number order before applying it. The replay list is applied at the
* very end of the replay process. Since the list is sorted in sequence number
* order, the older modifications are applied first. This function returns zero
* in case of success and a negative error code in case of failure.
*/
static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
union ubifs_key *key, unsigned long long sqnum,
int deletion, int *used, loff_t old_size,
loff_t new_size)
{
struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r;
dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
if (key_inum(c, key) >= c->highest_inum)
c->highest_inum = key_inum(c, key);
dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
while (*p) {
parent = *p;
r = rb_entry(parent, struct replay_entry, rb);
if (sqnum < r->sqnum) {
p = &(*p)->rb_left;
continue;
} else if (sqnum > r->sqnum) {
p = &(*p)->rb_right;
continue;
}
ubifs_err("duplicate sqnum in replay");
return -EINVAL;
}
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r)
return -ENOMEM;
......@@ -370,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
r->lnum = lnum;
r->offs = offs;
r->len = len;
r->deletion = !!deletion;
r->sqnum = sqnum;
r->flags = (deletion ? REPLAY_DELETION : 0);
key_copy(c, key, &r->key);
r->old_size = old_size;
r->new_size = new_size;
key_copy(c, key, &r->key);
rb_link_node(&r->rb, parent, p);
rb_insert_color(&r->rb, &c->replay_tree);
list_add_tail(&r->list, &c->replay_list);
return 0;
}
/**
* insert_dent - insert a directory entry node into the replay tree.
* insert_dent - insert a directory entry node into the replay list.
* @c: UBIFS file-system description object
* @lnum: node logical eraseblock number
* @offs: node offset
......@@ -394,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
* @deletion: non-zero if this is a deletion
* @used: number of bytes in use in a LEB
*
* This function inserts a scanned directory entry node to the replay tree.
* Returns zero in case of success and a negative error code in case of
* failure.
*
* This function is also used for extended attribute entries because they are
* implemented as directory entry nodes.
* This function inserts a scanned directory entry node or an extended
* attribute entry to the replay list. Returns zero in case of success and a
* negative error code in case of failure.
*/
static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
union ubifs_key *key, const char *name, int nlen,
unsigned long long sqnum, int deletion, int *used)
{
struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r;
char *nbuf;
dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
if (key_inum(c, key) >= c->highest_inum)
c->highest_inum = key_inum(c, key);
dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
while (*p) {
parent = *p;
r = rb_entry(parent, struct replay_entry, rb);
if (sqnum < r->sqnum) {
p = &(*p)->rb_left;
continue;
}
if (sqnum > r->sqnum) {
p = &(*p)->rb_right;
continue;
}
ubifs_err("duplicate sqnum in replay");
return -EINVAL;
}
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r)
return -ENOMEM;
nbuf = kmalloc(nlen + 1, GFP_KERNEL);
if (!nbuf) {
kfree(r);
......@@ -442,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
r->lnum = lnum;
r->offs = offs;
r->len = len;
r->deletion = !!deletion;
r->sqnum = sqnum;
key_copy(c, key, &r->key);
r->nm.len = nlen;
memcpy(nbuf, name, nlen);
nbuf[nlen] = '\0';
r->nm.name = nbuf;
r->flags = (deletion ? REPLAY_DELETION : 0);
key_copy(c, key, &r->key);
ubifs_assert(!*p);
rb_link_node(&r->rb, parent, p);
rb_insert_color(&r->rb, &c->replay_tree);
list_add_tail(&r->list, &c->replay_list);
return 0;
}
......@@ -488,30 +472,93 @@ int ubifs_validate_entry(struct ubifs_info *c,
return 0;
}
/**
* is_last_bud - check if the bud is the last in the journal head.
* @c: UBIFS file-system description object
* @bud: bud description object
*
* This function checks if bud @bud is the last bud in its journal head. This
* information is then used by 'replay_bud()' to decide whether the bud can
* have corruptions or not. Indeed, only last buds can be corrupted by power
* cuts. Returns %1 if this is the last bud, and %0 if not.
*/
static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
{
struct ubifs_jhead *jh = &c->jheads[bud->jhead];
struct ubifs_bud *next;
uint32_t data;
int err;
if (list_is_last(&bud->list, &jh->buds_list))
return 1;
/*
* The following is a quirk to make sure we work correctly with UBIFS
* images used with older UBIFS.
*
* Normally, the last bud will be the last in the journal head's list
* of bud. However, there is one exception if the UBIFS image belongs
* to older UBIFS. This is fairly unlikely: one would need to use old
* UBIFS, then have a power cut exactly at the right point, and then
* try to mount this image with new UBIFS.
*
* The exception is: it is possible to have 2 buds A and B, A goes
* before B, and B is the last, bud B is contains no data, and bud A is
* corrupted at the end. The reason is that in older versions when the
* journal code switched the next bud (from A to B), it first added a
* log reference node for the new bud (B), and only after this it
* synchronized the write-buffer of current bud (A). But later this was
* changed and UBIFS started to always synchronize the write-buffer of
* the bud (A) before writing the log reference for the new bud (B).
*
* But because older UBIFS always synchronized A's write-buffer before
* writing to B, we can recognize this exceptional situation but
* checking the contents of bud B - if it is empty, then A can be
* treated as the last and we can recover it.
*
* TODO: remove this piece of code in a couple of years (today it is
* 16.05.2011).
*/
next = list_entry(bud->list.next, struct ubifs_bud, list);
if (!list_is_last(&next->list, &jh->buds_list))
return 0;
err = ubi_read(c->ubi, next->lnum, (char *)&data,
next->start, 4);
if (err)
return 0;
return data == 0xFFFFFFFF;
}
/**
* replay_bud - replay a bud logical eraseblock.
* @c: UBIFS file-system description object
* @lnum: bud logical eraseblock number to replay
* @offs: bud start offset
* @jhead: journal head to which this bud belongs
* @free: amount of free space in the bud is returned here
* @dirty: amount of dirty space from padding and deletion nodes is returned
* here
* @b: bud entry which describes the bud
*
* This function returns zero in case of success and a negative error code in
* case of failure.
* This function replays bud @bud, recovers it if needed, and adds all nodes
* from this bud to the replay list. Returns zero in case of success and a
* negative error code in case of failure.
*/
static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
int *free, int *dirty)
static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
{
int err = 0, used = 0;
int is_last = is_last_bud(c, b->bud);
int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
struct ubifs_bud *bud;
dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
if (c->need_recovery)
sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
lnum, b->bud->jhead, offs, is_last);
if (c->need_recovery && is_last)
/*
* Recover only last LEBs in the journal heads, because power
* cuts may cause corruptions only in these LEBs, because only
* these LEBs could possibly be written to at the power cut
* time.
*/
sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf,
b->bud->jhead != GCHD);
else
sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
if (IS_ERR(sleb))
......@@ -627,15 +674,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
goto out;
}
bud = ubifs_search_bud(c, lnum);
if (!bud)
BUG();
ubifs_assert(ubifs_search_bud(c, lnum));
ubifs_assert(sleb->endpt - offs >= used);
ubifs_assert(sleb->endpt % c->min_io_size == 0);
*dirty = sleb->endpt - offs - used;
*free = c->leb_size - sleb->endpt;
b->dirty = sleb->endpt - offs - used;
b->free = c->leb_size - sleb->endpt;
dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
out:
ubifs_scan_destroy(sleb);
......@@ -648,58 +693,6 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
return -EINVAL;
}
/**
* insert_ref_node - insert a reference node to the replay tree.
* @c: UBIFS file-system description object
* @lnum: node logical eraseblock number
* @offs: node offset
* @sqnum: sequence number
* @free: amount of free space in bud
* @dirty: amount of dirty space from padding and deletion nodes
* @jhead: journal head number for the bud
*
* This function inserts a reference node to the replay tree and returns zero
* in case of success or a negative error code in case of failure.
*/
static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
unsigned long long sqnum, int free, int dirty,
int jhead)
{
struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r;
dbg_mnt("add ref LEB %d:%d", lnum, offs);
while (*p) {
parent = *p;
r = rb_entry(parent, struct replay_entry, rb);
if (sqnum < r->sqnum) {
p = &(*p)->rb_left;
continue;
} else if (sqnum > r->sqnum) {
p = &(*p)->rb_right;
continue;
}
ubifs_err("duplicate sqnum in replay tree");
return -EINVAL;
}
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r)
return -ENOMEM;
r->lnum = lnum;
r->offs = offs;
r->sqnum = sqnum;
r->flags = REPLAY_REF;
r->free = free;
r->dirty = dirty;
r->jhead = jhead;
rb_link_node(&r->rb, parent, p);
rb_insert_color(&r->rb, &c->replay_tree);
return 0;
}
/**
* replay_buds - replay all buds.
* @c: UBIFS file-system description object
......@@ -710,17 +703,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
static int replay_buds(struct ubifs_info *c)
{
struct bud_entry *b;
int err, uninitialized_var(free), uninitialized_var(dirty);
int err;
unsigned long long prev_sqnum = 0;
list_for_each_entry(b, &c->replay_buds, list) {
err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
&free, &dirty);
if (err)
return err;
err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
free, dirty, b->bud->jhead);
err = replay_bud(c, b);
if (err)
return err;
ubifs_assert(b->sqnum > prev_sqnum);
prev_sqnum = b->sqnum;
}
return 0;
......@@ -1060,25 +1052,29 @@ int ubifs_replay_journal(struct ubifs_info *c)
if (err)
goto out;
err = apply_replay_tree(c);
err = apply_replay_list(c);
if (err)
goto out;
err = set_buds_lprops(c);
if (err)
goto out;
/*
* UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
* to roughly estimate index growth. Things like @c->min_idx_lebs
* UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
* to roughly estimate index growth. Things like @c->bi.min_idx_lebs
* depend on it. This means we have to initialize it to make sure
* budgeting works properly.
*/
c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
c->budg_uncommitted_idx *= c->max_idx_node_sz;
c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
c->bi.uncommitted_idx *= c->max_idx_node_sz;
ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
(unsigned long)c->highest_inum);
out:
destroy_replay_tree(c);
destroy_replay_list(c);
destroy_bud_list(c);
c->replaying = 0;
return err;
......
......@@ -475,7 +475,8 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
* @c: UBIFS file-system description object
*
* This function returns a pointer to the superblock node or a negative error
* code.
* code. Note, the user of this function is responsible of kfree()'ing the
* returned superblock buffer.
*/
struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
{
......@@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
memcpy(&c->uuid, &sup->uuid, 16);
c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
/* Automatically increase file system size to the maximum size */
c->old_leb_cnt = c->leb_cnt;
......@@ -650,3 +652,152 @@ int ubifs_read_superblock(struct ubifs_info *c)
kfree(sup);
return err;
}
/**
* fixup_leb - fixup/unmap an LEB containing free space.
* @c: UBIFS file-system description object
* @lnum: the LEB number to fix up
* @len: number of used bytes in LEB (starting at offset 0)
*
* This function reads the contents of the given LEB number @lnum, then fixes
* it up, so that empty min. I/O units in the end of LEB are actually erased on
* flash (rather than being just all-0xff real data). If the LEB is completely
* empty, it is simply unmapped.
*/
static int fixup_leb(struct ubifs_info *c, int lnum, int len)
{
int err;
ubifs_assert(len >= 0);
ubifs_assert(len % c->min_io_size == 0);
ubifs_assert(len < c->leb_size);
if (len == 0) {
dbg_mnt("unmap empty LEB %d", lnum);
return ubi_leb_unmap(c->ubi, lnum);
}
dbg_mnt("fixup LEB %d, data len %d", lnum, len);
err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
if (err)
return err;
return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
}
/**
* fixup_free_space - find & remap all LEBs containing free space.
* @c: UBIFS file-system description object
*
* This function walks through all LEBs in the filesystem and fiexes up those
* containing free/empty space.
*/
static int fixup_free_space(struct ubifs_info *c)
{
int lnum, err = 0;
struct ubifs_lprops *lprops;
ubifs_get_lprops(c);
/* Fixup LEBs in the master area */
for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
if (err)
goto out;
}
/* Unmap unused log LEBs */
lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
while (lnum != c->ltail_lnum) {
err = fixup_leb(c, lnum, 0);
if (err)
goto out;
lnum = ubifs_next_log_lnum(c, lnum);
}
/* Fixup the current log head */
err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
if (err)
goto out;
/* Fixup LEBs in the LPT area */
for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
int free = c->ltab[lnum - c->lpt_first].free;
if (free > 0) {
err = fixup_leb(c, lnum, c->leb_size - free);
if (err)
goto out;
}
}
/* Unmap LEBs in the orphans area */
for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
err = fixup_leb(c, lnum, 0);
if (err)
goto out;
}
/* Fixup LEBs in the main area */
for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
lprops = ubifs_lpt_lookup(c, lnum);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
goto out;
}
if (lprops->free > 0) {
err = fixup_leb(c, lnum, c->leb_size - lprops->free);
if (err)
goto out;
}
}
out:
ubifs_release_lprops(c);
return err;
}
/**
* ubifs_fixup_free_space - find & fix all LEBs with free space.
* @c: UBIFS file-system description object
*
* This function fixes up LEBs containing free space on first mount, if the
* appropriate flag was set when the FS was created. Each LEB with one or more
* empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
* the free space is actually erased. E.g., this is necessary for some NAND
* chips, since the free space may have been programmed like real "0xff" data
* (generating a non-0xff ECC), causing future writes to the not-really-erased
* NAND pages to behave badly. After the space is fixed up, the superblock flag
* is cleared, so that this is skipped for all future mounts.
*/
int ubifs_fixup_free_space(struct ubifs_info *c)
{
int err;
struct ubifs_sb_node *sup;
ubifs_assert(c->space_fixup);
ubifs_assert(!c->ro_mount);
ubifs_msg("start fixing up free space");
err = fixup_free_space(c);
if (err)
return err;
sup = ubifs_read_sb_node(c);
if (IS_ERR(sup))
return PTR_ERR(sup);
/* Free-space fixup is no longer required */
c->space_fixup = 0;
sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
err = ubifs_write_sb_node(c, sup);
kfree(sup);
if (err)
return err;
ubifs_msg("free space fixup complete");
return err;
}
......@@ -375,7 +375,7 @@ static void ubifs_evict_inode(struct inode *inode)
ubifs_release_dirty_inode_budget(c, ui);
else {
/* We've deleted something - clean the "no space" flags */
c->nospace = c->nospace_rp = 0;
c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
done:
......@@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c)
* be compressed and direntries are of the maximum size.
*
* Note, data, which may be stored in inodes is budgeted separately, so
* it is not included into 'c->inode_budget'.
* it is not included into 'c->bi.inode_budget'.
*/
c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
c->inode_budget = UBIFS_INO_NODE_SZ;
c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
c->bi.inode_budget = UBIFS_INO_NODE_SZ;
c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
/*
* When the amount of flash space used by buds becomes
......@@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c)
{
long long tmp64;
c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
c->report_rp_size = ubifs_reported_space(c, c->rp_size);
/*
......@@ -1144,8 +1144,8 @@ static int check_free_space(struct ubifs_info *c)
{
ubifs_assert(c->dark_wm > 0);
if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
ubifs_err("insufficient free space to mount in read/write mode");
dbg_dump_budg(c);
ubifs_err("insufficient free space to mount in R/W mode");
dbg_dump_budg(c, &c->bi);
dbg_dump_lprops(c);
return -ENOSPC;
}
......@@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_lpt;
err = dbg_check_idx_size(c, c->old_idx_sz);
err = dbg_check_idx_size(c, c->bi.old_idx_sz);
if (err)
goto out_lpt;
......@@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_journal;
/* Calculate 'min_idx_lebs' after journal replay */
c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
if (err)
......@@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c)
} else
ubifs_assert(c->lst.taken_empty_lebs > 0);
if (!c->ro_mount && c->space_fixup) {
err = ubifs_fixup_free_space(c);
if (err)
goto out_infos;
}
err = dbg_check_filesystem(c);
if (err)
goto out_infos;
......@@ -1442,7 +1448,8 @@ static int mount_ubifs(struct ubifs_info *c)
c->main_lebs, c->main_first, c->leb_cnt - 1);
dbg_msg("index LEBs: %d", c->lst.idx_lebs);
dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
c->bi.old_idx_sz >> 20);
dbg_msg("key hash type: %d", c->key_hash_type);
dbg_msg("tree fanout: %d", c->fanout);
dbg_msg("reserved GC LEB: %d", c->gc_lnum);
......@@ -1456,7 +1463,7 @@ static int mount_ubifs(struct ubifs_info *c)
dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
dbg_msg("dead watermark: %d", c->dead_wm);
dbg_msg("dark watermark: %d", c->dark_wm);
......@@ -1584,6 +1591,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
}
sup->leb_cnt = cpu_to_le32(c->leb_cnt);
err = ubifs_write_sb_node(c, sup);
kfree(sup);
if (err)
goto out;
}
......@@ -1684,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c)
*/
err = dbg_check_space_info(c);
}
if (c->space_fixup) {
err = ubifs_fixup_free_space(c);
if (err)
goto out;
}
mutex_unlock(&c->umount_mutex);
return err;
......@@ -1766,10 +1781,9 @@ static void ubifs_put_super(struct super_block *sb)
* to write them back because of I/O errors.
*/
if (!c->ro_error) {
ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
ubifs_assert(c->budg_idx_growth == 0);
ubifs_assert(c->budg_dd_growth == 0);
ubifs_assert(c->budg_data_growth == 0);
ubifs_assert(c->bi.idx_growth == 0);
ubifs_assert(c->bi.dd_growth == 0);
ubifs_assert(c->bi.data_growth == 0);
}
/*
......
......@@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
if (err) {
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
znode = dirty_cow_bottom_up(c, znode);
if (IS_ERR(znode)) {
err = PTR_ERR(znode);
goto out_unlock;
}
znode = dirty_cow_bottom_up(c, znode);
if (IS_ERR(znode)) {
err = PTR_ERR(znode);
goto out_unlock;
}
}
err = tnc_delete(c, znode, n);
}
......
......@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
c->gap_lebs = NULL;
return err;
}
if (!dbg_force_in_the_gaps_enabled) {
if (dbg_force_in_the_gaps_enabled()) {
/*
* Do not print scary warnings if the debugging
* option which forces in-the-gaps is enabled.
*/
ubifs_err("out of space");
spin_lock(&c->space_lock);
dbg_dump_budg(c);
spin_unlock(&c->space_lock);
ubifs_warn("out of space");
dbg_dump_budg(c, &c->bi);
dbg_dump_lprops(c);
}
/* Try to commit anyway */
......@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
spin_lock(&c->space_lock);
/*
* Although we have not finished committing yet, update size of the
* committed index ('c->old_idx_sz') and zero out the index growth
* committed index ('c->bi.old_idx_sz') and zero out the index growth
* budget. It is OK to do this now, because we've reserved all the
* space which is needed to commit the index, and it is save for the
* budgeting subsystem to assume the index is already committed,
* even though it is not.
*/
ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
c->old_idx_sz = c->calc_idx_sz;
c->budg_uncommitted_idx = 0;
c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
c->bi.old_idx_sz = c->calc_idx_sz;
c->bi.uncommitted_idx = 0;
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
spin_unlock(&c->space_lock);
mutex_unlock(&c->tnc_mutex);
......
......@@ -408,9 +408,11 @@ enum {
* Superblock flags.
*
* UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
* UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
*/
enum {
UBIFS_FLG_BIGLPT = 0x02,
UBIFS_FLG_SPACE_FIXUP = 0x04,
};
/**
......@@ -434,7 +436,7 @@ struct ubifs_ch {
__u8 node_type;
__u8 group_type;
__u8 padding[2];
} __attribute__ ((packed));
} __packed;
/**
* union ubifs_dev_desc - device node descriptor.
......@@ -448,7 +450,7 @@ struct ubifs_ch {
union ubifs_dev_desc {
__le32 new;
__le64 huge;
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_ino_node - inode node.
......@@ -509,7 +511,7 @@ struct ubifs_ino_node {
__le16 compr_type;
__u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
__u8 data[];
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_dent_node - directory entry node.
......@@ -534,7 +536,7 @@ struct ubifs_dent_node {
__le16 nlen;
__u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
__u8 name[];
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_data_node - data node.
......@@ -555,7 +557,7 @@ struct ubifs_data_node {
__le16 compr_type;
__u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
__u8 data[];
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_trun_node - truncation node.
......@@ -575,7 +577,7 @@ struct ubifs_trun_node {
__u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
__le64 old_size;
__le64 new_size;
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_pad_node - padding node.
......@@ -586,7 +588,7 @@ struct ubifs_trun_node {
struct ubifs_pad_node {
struct ubifs_ch ch;
__le32 pad_len;
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_sb_node - superblock node.
......@@ -644,7 +646,7 @@ struct ubifs_sb_node {
__u8 uuid[16];
__le32 ro_compat_version;
__u8 padding2[3968];
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_mst_node - master node.
......@@ -711,7 +713,7 @@ struct ubifs_mst_node {
__le32 idx_lebs;
__le32 leb_cnt;
__u8 padding[344];
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_ref_node - logical eraseblock reference node.
......@@ -727,7 +729,7 @@ struct ubifs_ref_node {
__le32 offs;
__le32 jhead;
__u8 padding[28];
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_branch - key/reference/length branch
......@@ -741,7 +743,7 @@ struct ubifs_branch {
__le32 offs;
__le32 len;
__u8 key[];
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_idx_node - indexing node.
......@@ -755,7 +757,7 @@ struct ubifs_idx_node {
__le16 child_cnt;
__le16 level;
__u8 branches[];
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_cs_node - commit start node.
......@@ -765,7 +767,7 @@ struct ubifs_idx_node {
struct ubifs_cs_node {
struct ubifs_ch ch;
__le64 cmt_no;
} __attribute__ ((packed));
} __packed;
/**
* struct ubifs_orph_node - orphan node.
......@@ -777,6 +779,6 @@ struct ubifs_orph_node {
struct ubifs_ch ch;
__le64 cmt_no;
__le64 inos[];
} __attribute__ ((packed));
} __packed;
#endif /* __UBIFS_MEDIA_H__ */
......@@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb {
* The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
* @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
* make sure @inode->i_size is always changed under @ui_mutex, because it
* cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
* with 'ubifs_writepage()' (see file.c). All the other inode fields are
* changed under @ui_mutex, so they do not need "shadow" fields. Note, one
* cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
* deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
* are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
* could consider to rework locking and base it on "shadow" fields.
*/
struct ubifs_inode {
......@@ -937,6 +937,40 @@ struct ubifs_mount_opts {
unsigned int compr_type:2;
};
/**
* struct ubifs_budg_info - UBIFS budgeting information.
* @idx_growth: amount of bytes budgeted for index growth
* @data_growth: amount of bytes budgeted for cached data
* @dd_growth: amount of bytes budgeted for cached data that will make
* other data dirty
* @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
* which still have to be taken into account because the index
* has not been committed so far
* @old_idx_sz: size of index on flash
* @min_idx_lebs: minimum number of LEBs required for the index
* @nospace: non-zero if the file-system does not have flash space (used as
* optimization)
* @nospace_rp: the same as @nospace, but additionally means that even reserved
* pool is full
* @page_budget: budget for a page (constant, nenver changed after mount)
* @inode_budget: budget for an inode (constant, nenver changed after mount)
* @dent_budget: budget for a directory entry (constant, nenver changed after
* mount)
*/
struct ubifs_budg_info {
long long idx_growth;
long long data_growth;
long long dd_growth;
long long uncommitted_idx;
unsigned long long old_idx_sz;
int min_idx_lebs;
unsigned int nospace:1;
unsigned int nospace_rp:1;
int page_budget;
int inode_budget;
int dent_budget;
};
struct ubifs_debug_info;
/**
......@@ -980,6 +1014,7 @@ struct ubifs_debug_info;
* @cmt_wq: wait queue to sleep on if the log is full and a commit is running
*
* @big_lpt: flag that LPT is too big to write whole during commit
* @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
* @no_chk_data_crc: do not check CRCs when reading data nodes (except during
* recovery)
* @bulk_read: enable bulk-reads
......@@ -1057,32 +1092,14 @@ struct ubifs_debug_info;
* @dirty_zn_cnt: number of dirty znodes
* @clean_zn_cnt: number of clean znodes
*
* @budg_idx_growth: amount of bytes budgeted for index growth
* @budg_data_growth: amount of bytes budgeted for cached data
* @budg_dd_growth: amount of bytes budgeted for cached data that will make
* other data dirty
* @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
* but which still have to be taken into account because
* the index has not been committed so far
* @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
* @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
* @nospace, and @nospace_rp;
* @min_idx_lebs: minimum number of LEBs required for the index
* @old_idx_sz: size of index on flash
* @space_lock: protects @bi and @lst
* @lst: lprops statistics
* @bi: budgeting information
* @calc_idx_sz: temporary variable which is used to calculate new index size
* (contains accurate new index size at end of TNC commit start)
* @lst: lprops statistics
* @nospace: non-zero if the file-system does not have flash space (used as
* optimization)
* @nospace_rp: the same as @nospace, but additionally means that even reserved
* pool is full
*
* @page_budget: budget for a page
* @inode_budget: budget for an inode
* @dent_budget: budget for a directory entry
*
* @ref_node_alsz: size of the LEB reference node aligned to the min. flash
* I/O unit
* I/O unit
* @mst_node_alsz: master node aligned size
* @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
* @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
......@@ -1189,7 +1206,6 @@ struct ubifs_debug_info;
* @replaying: %1 during journal replay
* @mounting: %1 while mounting
* @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
* @replay_tree: temporary tree used during journal replay
* @replay_list: temporary list used during journal replay
* @replay_buds: list of buds to replay
* @cs_sqnum: sequence number of first node in the log (commit start node)
......@@ -1238,6 +1254,7 @@ struct ubifs_info {
wait_queue_head_t cmt_wq;
unsigned int big_lpt:1;
unsigned int space_fixup:1;
unsigned int no_chk_data_crc:1;
unsigned int bulk_read:1;
unsigned int default_compr:2;
......@@ -1308,21 +1325,10 @@ struct ubifs_info {
atomic_long_t dirty_zn_cnt;
atomic_long_t clean_zn_cnt;
long long budg_idx_growth;
long long budg_data_growth;
long long budg_dd_growth;
long long budg_uncommitted_idx;
spinlock_t space_lock;
int min_idx_lebs;
unsigned long long old_idx_sz;
unsigned long long calc_idx_sz;
struct ubifs_lp_stats lst;
unsigned int nospace:1;
unsigned int nospace_rp:1;
int page_budget;
int inode_budget;
int dent_budget;
struct ubifs_budg_info bi;
unsigned long long calc_idx_sz;
int ref_node_alsz;
int mst_node_alsz;
......@@ -1430,7 +1436,6 @@ struct ubifs_info {
unsigned int replaying:1;
unsigned int mounting:1;
unsigned int remounting_rw:1;
struct rb_root replay_tree;
struct list_head replay_list;
struct list_head replay_buds;
unsigned long long cs_sqnum;
......@@ -1628,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c);
int ubifs_read_superblock(struct ubifs_info *c);
struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
int ubifs_fixup_free_space(struct ubifs_info *c);
/* replay.c */
int ubifs_validate_entry(struct ubifs_info *c,
......
......@@ -80,8 +80,8 @@ enum {
SECURITY_XATTR,
};
static const struct inode_operations none_inode_operations;
static const struct file_operations none_file_operations;
static const struct inode_operations empty_iops;
static const struct file_operations empty_fops;
/**
* create_xattr - create an extended attribute.
......@@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
/* Re-define all operations to be "nothing" */
inode->i_mapping->a_ops = &empty_aops;
inode->i_op = &none_inode_operations;
inode->i_fop = &none_file_operations;
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
ui = ubifs_inode(inode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment