Commit b975dee3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linux-next' of git://git.infradead.org/~dedekind/ubifs-2.6

* 'linux-next' of git://git.infradead.org/~dedekind/ubifs-2.6:
  UBIFS: make minimum fanout 3
  UBIFS: fix division by zero
  UBIFS: amend f_fsid
  UBIFS: fill f_fsid
  UBIFS: improve statfs reporting even more
  UBIFS: introduce LEB overhead
  UBIFS: add forgotten gc_idx_lebs component
  UBIFS: fix assertion
  UBIFS: improve statfs reporting
  UBIFS: remove incorrect index space check
  UBIFS: push empty flash hack down
  UBIFS: do not update min_idx_lebs in stafs
  UBIFS: allow for racing between GC and TNC
  UBIFS: always read hashed-key nodes under TNC mutex
  UBIFS: fix zero-length truncations
parents deac93df a5cb562d
......@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
int subtract_lebs;
long long available;
/*
* Force the amount available to the total size reported if the used
* space is zero.
*/
if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
c->budg_data_growth + c->budg_dd_growth == 0) {
/* Do the same calculation as for c->block_cnt */
available = c->main_lebs - 2;
available *= c->leb_size - c->dark_wm;
return available;
}
available = c->main_bytes - c->lst.total_used;
/*
......@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
}
/**
* ubifs_budg_get_free_space - return amount of free space.
* ubifs_reported_space - calculate reported free space.
* @c: the UBIFS file-system description object
* @free: amount of free space
*
* This function calculates amount of free space which will be reported to
* user-space. User-space application tend to expect that if the file-system
* (e.g., via the 'statfs()' call) reports that it has N bytes available, they
* are able to write a file of size N. UBIFS attaches node headers to each data
* node and it has to write indexind nodes as well. This introduces additional
* overhead, and UBIFS it has to report sligtly less free space to meet the
* above expectetion.
*
* This function assumes free space is made up of uncompressed data nodes and
* full index nodes (one per data node, tripled because we always allow enough
* space to write the index thrice).
*
* Note, the calculation is pessimistic, which means that most of the time
* UBIFS reports less space than it actually has.
*/
long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
{
int divisor, factor, f;
/*
* Reported space size is @free * X, where X is UBIFS block size
* divided by UBIFS block size + all overhead one data block
* introduces. The overhead is the node header + indexing overhead.
*
* Indexing overhead calculations are based on the following formula:
* I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
* of data nodes, f - fanout. Because effective UBIFS fanout is twice
* as less than maximum fanout, we assume that each data node
* introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
* Note, the multiplier 3 is because UBIFS reseves thrice as more space
* for the index.
*/
f = c->fanout > 3 ? c->fanout >> 1 : 2;
factor = UBIFS_BLOCK_SIZE;
divisor = UBIFS_MAX_DATA_NODE_SZ;
divisor += (c->max_idx_node_sz * 3) / (f - 1);
free *= factor;
do_div(free, divisor);
return free;
}
/**
* ubifs_get_free_space - return amount of free space.
* @c: UBIFS file-system description object
*
* This function returns amount of free space on the file-system.
* This function calculates amount of free space to report to user-space.
*
* Because UBIFS may introduce substantial overhead (the index, node headers,
* alighment, wastage at the end of eraseblocks, etc), it cannot report real
* amount of free flash space it has (well, because not all dirty space is
* reclamable, UBIFS does not actually know the real amount). If UBIFS did so,
* it would bread user expectetion about what free space is. Users seem to
* accustomed to assume that if the file-system reports N bytes of free space,
* they would be able to fit a file of N bytes to the FS. This almost works for
* traditional file-systems, because they have way less overhead than UBIFS.
* So, to keep users happy, UBIFS tries to take the overhead into account.
*/
long long ubifs_budg_get_free_space(struct ubifs_info *c)
long long ubifs_get_free_space(struct ubifs_info *c)
{
int min_idx_lebs, rsvd_idx_lebs;
int min_idx_lebs, rsvd_idx_lebs, lebs;
long long available, outstanding, free;
/* Do exactly the same calculations as in 'do_budget_space()' */
spin_lock(&c->space_lock);
min_idx_lebs = ubifs_calc_min_idx_lebs(c);
outstanding = c->budg_data_growth + c->budg_dd_growth;
if (min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
- c->lst.taken_empty_lebs) {
/*
* Force the amount available to the total size reported if the used
* space is zero.
*/
if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
spin_unlock(&c->space_lock);
return 0;
return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
}
available = ubifs_calc_available(c, min_idx_lebs);
outstanding = c->budg_data_growth + c->budg_dd_growth;
c->min_idx_lebs = min_idx_lebs;
/*
* When reporting free space to user-space, UBIFS guarantees that it is
* possible to write a file of free space size. This means that for
* empty LEBs we may use more precise calculations than
* 'ubifs_calc_available()' is using. Namely, we know that in empty
* LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
* Thus, amend the available space.
*
* Note, the calculations below are similar to what we have in
* 'do_budget_space()', so refer there for comments.
*/
if (min_idx_lebs > c->lst.idx_lebs)
rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
c->lst.taken_empty_lebs;
lebs -= rsvd_idx_lebs;
available += lebs * (c->dark_wm - c->leb_overhead);
spin_unlock(&c->space_lock);
if (available > outstanding)
......
......@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
if (err) {
if (err != -ENOSPC)
return err;
err = 0;
budgeted = 0;
}
......
......@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
int err;
struct ubifs_budget_req req;
loff_t old_size = inode->i_size, new_size = attr->ia_size;
int offset = new_size & (UBIFS_BLOCK_SIZE - 1);
int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
struct ubifs_inode *ui = ubifs_inode(inode);
dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
......@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
/* A funny way to budget for truncation node */
req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
err = ubifs_budget_space(c, &req);
if (err)
return err;
if (err) {
/*
* Treat truncations to zero as deletion and always allow them,
* just like we do for '->unlink()'.
*/
if (new_size || err != -ENOSPC)
return err;
budgeted = 0;
}
err = vmtruncate(inode, new_size);
if (err)
......@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
err = ubifs_jnl_truncate(c, inode, old_size, new_size);
mutex_unlock(&ui->ui_mutex);
out_budg:
ubifs_release_budget(c, &req);
if (budgeted)
ubifs_release_budget(c, &req);
else {
c->nospace = c->nospace_rp = 0;
smp_wmb();
}
return err;
}
......
......@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
* dirty index heap, and it falls-back to LPT scanning if the heaps are empty
* or do not have an LEB which satisfies the @min_space criteria.
*
* Note:
* o LEBs which have less than dead watermark of dirty space are never picked
* by this function;
*
* Returns zero and the LEB properties of
* found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
* negative error code in case of other failures. The returned LEB is marked as
* "taken".
* Note, LEBs which have less than dead watermark of free + dirty space are
* never picked by this function.
*
* The additional @pick_free argument controls if this function has to return a
* free or freeable LEB if one is present. For example, GC must to set it to %1,
......@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
*
* In addition @pick_free is set to %2 by the recovery process in order to
* recover gc_lnum in which case an index LEB must not be returned.
*
* This function returns zero and the LEB properties of found dirty LEB in case
* of success, %-ENOSPC if no dirty LEB was found and a negative error code in
* case of other failures. The returned LEB is marked as "taken".
*/
int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
int min_space, int pick_free)
......@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
int lebs, rsvd_idx_lebs = 0;
spin_lock(&c->space_lock);
lebs = c->lst.empty_lebs;
lebs = c->lst.empty_lebs + c->idx_gc_cnt;
lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
/*
......@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
lp = idx_lp;
if (lp) {
ubifs_assert(lp->dirty >= c->dead_wm);
ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
goto found;
}
......
......@@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
if (err)
goto out;
/* Allow for races with TNC */
c->gced_lnum = lnum;
smp_wmb();
c->gc_seq += 1;
smp_wmb();
if (c->gc_lnum == -1) {
c->gc_lnum = lnum;
err = LEB_RETAINED;
......
......@@ -283,38 +283,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c,
return (void *)((struct ubifs_branch *)idx->branches)->key;
}
/**
* ubifs_reported_space - calculate reported free space.
* @c: the UBIFS file-system description object
* @free: amount of free space
*
* This function calculates amount of free space which will be reported to
* user-space. User-space application tend to expect that if the file-system
* (e.g., via the 'statfs()' call) reports that it has N bytes available, they
* are able to write a file of size N. UBIFS attaches node headers to each data
* node and it has to write indexind nodes as well. This introduces additional
* overhead, and UBIFS it has to report sligtly less free space to meet the
* above expectetion.
*
* This function assumes free space is made up of uncompressed data nodes and
* full index nodes (one per data node, doubled because we always allow enough
* space to write the index twice).
*
* Note, the calculation is pessimistic, which means that most of the time
* UBIFS reports less space than it actually has.
*/
static inline long long ubifs_reported_space(const struct ubifs_info *c,
uint64_t free)
{
int divisor, factor;
divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
do_div(free, divisor);
return free * factor;
}
/**
* ubifs_current_time - round current time to time granularity.
* @inode: inode
......@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode)
current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
}
/**
* ubifs_tnc_lookup - look up a file-system node.
* @c: UBIFS file-system description object
* @key: node key to lookup
* @node: the node is returned here
*
* This function look up and reads node with key @key. The caller has to make
* sure the @node buffer is large enough to fit the node. Returns zero in case
* of success, %-ENOENT if the node was not found, and a negative error code in
* case of failure.
*/
static inline int ubifs_tnc_lookup(struct ubifs_info *c,
const union ubifs_key *key, void *node)
{
return ubifs_tnc_locate(c, key, node, NULL, NULL);
}
#endif /* __UBIFS_MISC_H__ */
......@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct ubifs_info *c = dentry->d_sb->s_fs_info;
unsigned long long free;
__le32 *uuid = (__le32 *)c->uuid;
free = ubifs_budg_get_free_space(c);
free = ubifs_get_free_space(c);
dbg_gen("free space %lld bytes (%lld blocks)",
free, free >> UBIFS_BLOCK_SHIFT);
......@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = 0;
buf->f_ffree = 0;
buf->f_namelen = UBIFS_MAX_NLEN;
buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
return 0;
}
......@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c)
c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
/*
* Calculate how many bytes would be wasted at the end of LEB if it was
* fully filled with data nodes of maximum size. This is used in
* calculations when reporting free space.
*/
c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
return 0;
}
......@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c)
* internally because it does not make much sense for UBIFS, but it is
* necessary to report something for the 'statfs()' call.
*
* Subtract the LEB reserved for GC and the LEB which is reserved for
* deletions.
*
* Review 'ubifs_calc_available()' if changing this calculation.
* Subtract the LEB reserved for GC, the LEB which is reserved for
* deletions, and assume only one journal head is available.
*/
tmp64 = c->main_lebs - 2;
tmp64 *= (uint64_t)c->leb_size - c->dark_wm;
tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1;
tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
tmp64 = ubifs_reported_space(c, tmp64);
c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
......
......@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
if (keys_cmp(c, key, &node_key) != 0)
ret = 0;
}
if (ret == 0)
if (ret == 0 && c->replaying)
dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
return ret;
......@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
}
/**
* ubifs_tnc_lookup - look up a file-system node.
* maybe_leb_gced - determine if a LEB may have been garbage collected.
* @c: UBIFS file-system description object
* @key: node key to lookup
* @node: the node is returned here
* @lnum: LEB number
* @gc_seq1: garbage collection sequence number
*
* This function look up and reads node with key @key. The caller has to make
* sure the @node buffer is large enough to fit the node. Returns zero in case
* of success, %-ENOENT if the node was not found, and a negative error code in
* case of failure.
* This function determines if @lnum may have been garbage collected since
* sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
* %0 is returned.
*/
int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
void *node)
static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
{
int found, n, err;
struct ubifs_znode *znode;
struct ubifs_zbranch zbr, *zt;
int gc_seq2, gced_lnum;
mutex_lock(&c->tnc_mutex);
found = ubifs_lookup_level0(c, key, &znode, &n);
if (!found) {
err = -ENOENT;
goto out;
} else if (found < 0) {
err = found;
goto out;
}
zt = &znode->zbranch[n];
if (is_hash_key(c, key)) {
/*
* In this case the leaf node cache gets used, so we pass the
* address of the zbranch and keep the mutex locked
*/
err = tnc_read_node_nm(c, zt, node);
goto out;
}
zbr = znode->zbranch[n];
mutex_unlock(&c->tnc_mutex);
err = ubifs_tnc_read_node(c, &zbr, node);
return err;
out:
mutex_unlock(&c->tnc_mutex);
return err;
gced_lnum = c->gced_lnum;
smp_rmb();
gc_seq2 = c->gc_seq;
/* Same seq means no GC */
if (gc_seq1 == gc_seq2)
return 0;
/* Different by more than 1 means we don't know */
if (gc_seq1 + 1 != gc_seq2)
return 1;
/*
* We have seen the sequence number has increased by 1. Now we need to
* be sure we read the right LEB number, so read it again.
*/
smp_rmb();
if (gced_lnum != c->gced_lnum)
return 1;
/* Finally we can check lnum */
if (gced_lnum == lnum)
return 1;
return 0;
}
/**
......@@ -1436,16 +1425,19 @@ int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
* @lnum: LEB number is returned here
* @offs: offset is returned here
*
* This function is the same as 'ubifs_tnc_lookup()' but it returns the node
* location also. See 'ubifs_tnc_lookup()'.
* This function look up and reads node with key @key. The caller has to make
* sure the @node buffer is large enough to fit the node. Returns zero in case
* of success, %-ENOENT if the node was not found, and a negative error code in
* case of failure. The node location can be returned in @lnum and @offs.
*/
int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
void *node, int *lnum, int *offs)
{
int found, n, err;
int found, n, err, safely = 0, gc_seq1;
struct ubifs_znode *znode;
struct ubifs_zbranch zbr, *zt;
again:
mutex_lock(&c->tnc_mutex);
found = ubifs_lookup_level0(c, key, &znode, &n);
if (!found) {
......@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
goto out;
}
zt = &znode->zbranch[n];
if (lnum) {
*lnum = zt->lnum;
*offs = zt->offs;
}
if (is_hash_key(c, key)) {
/*
* In this case the leaf node cache gets used, so we pass the
* address of the zbranch and keep the mutex locked
*/
*lnum = zt->lnum;
*offs = zt->offs;
err = tnc_read_node_nm(c, zt, node);
goto out;
}
if (safely) {
err = ubifs_tnc_read_node(c, zt, node);
goto out;
}
/* Drop the TNC mutex prematurely and race with garbage collection */
zbr = znode->zbranch[n];
gc_seq1 = c->gc_seq;
mutex_unlock(&c->tnc_mutex);
*lnum = zbr.lnum;
*offs = zbr.offs;
if (ubifs_get_wbuf(c, zbr.lnum)) {
/* We do not GC journal heads */
err = ubifs_tnc_read_node(c, &zbr, node);
return err;
}
err = ubifs_tnc_read_node(c, &zbr, node);
return err;
err = fallible_read_node(c, key, &zbr, node);
if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
/*
* The node may have been GC'ed out from under us so try again
* while keeping the TNC mutex locked.
*/
safely = 1;
goto again;
}
return 0;
out:
mutex_unlock(&c->tnc_mutex);
......@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
{
int found, n, err;
struct ubifs_znode *znode;
struct ubifs_zbranch zbr;
dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
mutex_lock(&c->tnc_mutex);
......@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
goto out_unlock;
}
zbr = znode->zbranch[n];
mutex_unlock(&c->tnc_mutex);
err = tnc_read_node_nm(c, &zbr, node);
return err;
err = tnc_read_node_nm(c, &znode->zbranch[n], node);
out_unlock:
mutex_unlock(&c->tnc_mutex);
......
......@@ -87,7 +87,7 @@
#define UBIFS_SK_LEN 8
/* Minimum index tree fanout */
#define UBIFS_MIN_FANOUT 2
#define UBIFS_MIN_FANOUT 3
/* Maximum number of levels in UBIFS indexing B-tree */
#define UBIFS_MAX_LEVELS 512
......
......@@ -995,6 +995,9 @@ struct ubifs_mount_opts {
* @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
* @max_inode_sz: maximum possible inode size in bytes
* @max_znode_sz: size of znode in bytes
*
* @leb_overhead: how many bytes are wasted in an LEB when it is filled with
* data nodes of maximum size - used in free space reporting
* @dead_wm: LEB dead space watermark
* @dark_wm: LEB dark space watermark
* @block_cnt: count of 4KiB blocks on the FS
......@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts {
* @sbuf: a buffer of LEB size used by GC and replay for scanning
* @idx_gc: list of index LEBs that have been garbage collected
* @idx_gc_cnt: number of elements on the idx_gc list
* @gc_seq: incremented for every non-index LEB garbage collected
* @gced_lnum: last non-index LEB that was garbage collected
*
* @infos_list: links all 'ubifs_info' objects
* @umount_mutex: serializes shrinker and un-mount
......@@ -1224,6 +1229,8 @@ struct ubifs_info {
int max_idx_node_sz;
long long max_inode_sz;
int max_znode_sz;
int leb_overhead;
int dead_wm;
int dark_wm;
int block_cnt;
......@@ -1257,6 +1264,8 @@ struct ubifs_info {
void *sbuf;
struct list_head idx_gc;
int idx_gc_cnt;
volatile int gc_seq;
volatile int gced_lnum;
struct list_head infos_list;
struct mutex umount_mutex;
......@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
long long ubifs_budg_get_free_space(struct ubifs_info *c);
long long ubifs_get_free_space(struct ubifs_info *c);
int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
void ubifs_convert_page_budget(struct ubifs_info *c);
long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free);
long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
/* find.c */
......@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
/* tnc.c */
int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode **zn, int *n);
int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
void *node);
int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
void *node, const struct qstr *nm);
int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment