Commit 377ad0c2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-5.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "Updates with a XArray adaptation, several fixes for shrinker and
  corrupted images are ready for this cycle.

  All commits have been stress tested with no noticeable smoke out and
  have been in linux-next as well.

  Summary:

   - Convert radix tree usage to XArray

   - Fix shrink scan count on multiple filesystem instances

   - Better handling for specific corrupted images

   - Update my email address in MAINTAINERS"

* tag 'erofs-for-5.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  MAINTAINERS: erofs: update my email address
  erofs: handle corrupted images whose decompressed size less than it'd be
  erofs: use LZ4_decompress_safe() for full decoding
  erofs: correct the remaining shrink objects
  erofs: convert workstn to XArray
parents 481ed297 20741a6e
...@@ -6248,7 +6248,7 @@ F: drivers/video/fbdev/s1d13xxxfb.c ...@@ -6248,7 +6248,7 @@ F: drivers/video/fbdev/s1d13xxxfb.c
F: include/video/s1d13xxxfb.h F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM EROFS FILE SYSTEM
M: Gao Xiang <gaoxiang25@huawei.com> M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <yuchao0@huawei.com> M: Chao Yu <yuchao0@huawei.com>
L: linux-erofs@lists.ozlabs.org L: linux-erofs@lists.ozlabs.org
S: Maintained S: Maintained
......
...@@ -157,17 +157,27 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) ...@@ -157,17 +157,27 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
} }
} }
ret = LZ4_decompress_safe_partial(src + inputmargin, out, /* legacy format could compress extra data in a pcluster. */
inlen, rq->outputsize, if (rq->partial_decoding || !support_0padding)
rq->outputsize); ret = LZ4_decompress_safe_partial(src + inputmargin, out,
if (ret < 0) { inlen, rq->outputsize,
erofs_err(rq->sb, "failed to decompress, in[%u, %u] out[%u]", rq->outputsize);
inlen, inputmargin, rq->outputsize); else
ret = LZ4_decompress_safe(src + inputmargin, out,
inlen, rq->outputsize);
if (ret != rq->outputsize) {
erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
ret, inlen, inputmargin, rq->outputsize);
WARN_ON(1); WARN_ON(1);
print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
16, 1, src + inputmargin, inlen, true); 16, 1, src + inputmargin, inlen, true);
print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
16, 1, out, rq->outputsize, true); 16, 1, out, rq->outputsize, true);
if (ret >= 0)
memset(out + ret, 0, rq->outputsize - ret);
ret = -EIO; ret = -EIO;
} }
......
...@@ -52,8 +52,8 @@ struct erofs_sb_info { ...@@ -52,8 +52,8 @@ struct erofs_sb_info {
struct list_head list; struct list_head list;
struct mutex umount_mutex; struct mutex umount_mutex;
/* the dedicated workstation for compression */ /* managed XArray arranged in physical block number */
struct radix_tree_root workstn_tree; struct xarray managed_pslots;
/* threshold for decompression synchronously */ /* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages; unsigned int max_sync_decompress_pages;
...@@ -402,8 +402,8 @@ static inline void *erofs_get_pcpubuf(unsigned int pagenr) ...@@ -402,8 +402,8 @@ static inline void *erofs_get_pcpubuf(unsigned int pagenr)
int erofs_workgroup_put(struct erofs_workgroup *grp); int erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index); pgoff_t index);
int erofs_register_workgroup(struct super_block *sb, struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
struct erofs_workgroup *grp); struct erofs_workgroup *grp);
void erofs_workgroup_free_rcu(struct erofs_workgroup *grp); void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
void erofs_shrinker_register(struct super_block *sb); void erofs_shrinker_register(struct super_block *sb);
void erofs_shrinker_unregister(struct super_block *sb); void erofs_shrinker_unregister(struct super_block *sb);
......
...@@ -425,7 +425,7 @@ static int erofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -425,7 +425,7 @@ static int erofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags &= ~SB_POSIXACL; sb->s_flags &= ~SB_POSIXACL;
#ifdef CONFIG_EROFS_FS_ZIP #ifdef CONFIG_EROFS_FS_ZIP
INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC); xa_init(&sbi->managed_pslots);
#endif #endif
/* get the root inode */ /* get the root inode */
......
...@@ -37,9 +37,6 @@ void *erofs_get_pcpubuf(unsigned int pagenr) ...@@ -37,9 +37,6 @@ void *erofs_get_pcpubuf(unsigned int pagenr)
/* global shrink count (for all mounted EROFS instances) */ /* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt; static atomic_long_t erofs_global_shrink_cnt;
#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
static int erofs_workgroup_get(struct erofs_workgroup *grp) static int erofs_workgroup_get(struct erofs_workgroup *grp)
{ {
int o; int o;
...@@ -66,7 +63,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, ...@@ -66,7 +63,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
repeat: repeat:
rcu_read_lock(); rcu_read_lock();
grp = radix_tree_lookup(&sbi->workstn_tree, index); grp = xa_load(&sbi->managed_pslots, index);
if (grp) { if (grp) {
if (erofs_workgroup_get(grp)) { if (erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */ /* prefer to relax rcu read side */
...@@ -80,43 +77,37 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, ...@@ -80,43 +77,37 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
return grp; return grp;
} }
int erofs_register_workgroup(struct super_block *sb, struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
struct erofs_workgroup *grp) struct erofs_workgroup *grp)
{ {
struct erofs_sb_info *sbi; struct erofs_sb_info *const sbi = EROFS_SB(sb);
int err; struct erofs_workgroup *pre;
/* grp shouldn't be broken or used before */
if (atomic_read(&grp->refcount) != 1) {
DBG_BUGON(1);
return -EINVAL;
}
err = radix_tree_preload(GFP_NOFS);
if (err)
return err;
sbi = EROFS_SB(sb);
xa_lock(&sbi->workstn_tree);
/* /*
* Bump up reference count before making this workgroup * Bump up a reference count before making this visible
* visible to other users in order to avoid potential UAF * to others for the XArray in order to avoid potential
* without serialized by workstn_lock. * UAF without serialized by xa_lock.
*/ */
__erofs_workgroup_get(grp); atomic_inc(&grp->refcount);
err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
if (err)
/*
* it's safe to decrease since the workgroup isn't visible
* and refcount >= 2 (cannot be freezed).
*/
__erofs_workgroup_put(grp);
xa_unlock(&sbi->workstn_tree); repeat:
radix_tree_preload_end(); xa_lock(&sbi->managed_pslots);
return err; pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
NULL, grp, GFP_NOFS);
if (pre) {
if (xa_is_err(pre)) {
pre = ERR_PTR(xa_err(pre));
} else if (erofs_workgroup_get(pre)) {
/* try to legitimize the current in-tree one */
xa_unlock(&sbi->managed_pslots);
cond_resched();
goto repeat;
}
atomic_dec(&grp->refcount);
grp = pre;
}
xa_unlock(&sbi->managed_pslots);
return grp;
} }
static void __erofs_workgroup_free(struct erofs_workgroup *grp) static void __erofs_workgroup_free(struct erofs_workgroup *grp)
...@@ -155,7 +146,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, ...@@ -155,7 +146,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
/* /*
* Note that all cached pages should be unattached * Note that all cached pages should be unattached
* before deleted from the radix tree. Otherwise some * before deleted from the XArray. Otherwise some
* cached pages could be still attached to the orphan * cached pages could be still attached to the orphan
* old workgroup when the new one is available in the tree. * old workgroup when the new one is available in the tree.
*/ */
...@@ -169,7 +160,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, ...@@ -169,7 +160,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
* however in order to avoid some race conditions, add a * however in order to avoid some race conditions, add a
* DBG_BUGON to observe this in advance. * DBG_BUGON to observe this in advance.
*/ */
DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp); DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
/* /*
* If managed cache is on, last refcount should indicate * If managed cache is on, last refcount should indicate
...@@ -182,22 +173,11 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, ...@@ -182,22 +173,11 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink) unsigned long nr_shrink)
{ {
pgoff_t first_index = 0; struct erofs_workgroup *grp;
void *batch[PAGEVEC_SIZE];
unsigned int freed = 0; unsigned int freed = 0;
unsigned long index;
int i, found; xa_for_each(&sbi->managed_pslots, index, grp) {
repeat:
xa_lock(&sbi->workstn_tree);
found = radix_tree_gang_lookup(&sbi->workstn_tree,
batch, first_index, PAGEVEC_SIZE);
for (i = 0; i < found; ++i) {
struct erofs_workgroup *grp = batch[i];
first_index = grp->index + 1;
/* try to shrink each valid workgroup */ /* try to shrink each valid workgroup */
if (!erofs_try_to_release_workgroup(sbi, grp)) if (!erofs_try_to_release_workgroup(sbi, grp))
continue; continue;
...@@ -206,10 +186,6 @@ static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, ...@@ -206,10 +186,6 @@ static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
if (!--nr_shrink) if (!--nr_shrink)
break; break;
} }
xa_unlock(&sbi->workstn_tree);
if (i && nr_shrink)
goto repeat;
return freed; return freed;
} }
...@@ -286,7 +262,7 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink, ...@@ -286,7 +262,7 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink,
spin_unlock(&erofs_sb_list_lock); spin_unlock(&erofs_sb_list_lock);
sbi->shrinker_run_no = run_no; sbi->shrinker_run_no = run_no;
freed += erofs_shrink_workstation(sbi, nr); freed += erofs_shrink_workstation(sbi, nr - freed);
spin_lock(&erofs_sb_list_lock); spin_lock(&erofs_sb_list_lock);
/* Get the next list element before we move this one */ /* Get the next list element before we move this one */
......
...@@ -67,16 +67,6 @@ static void z_erofs_pcluster_init_once(void *ptr) ...@@ -67,16 +67,6 @@ static void z_erofs_pcluster_init_once(void *ptr)
pcl->compressed_pages[i] = NULL; pcl->compressed_pages[i] = NULL;
} }
static void z_erofs_pcluster_init_always(struct z_erofs_pcluster *pcl)
{
struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
atomic_set(&pcl->obj.refcount, 1);
DBG_BUGON(cl->nr_pages);
DBG_BUGON(cl->vcnt);
}
int __init z_erofs_init_zip_subsystem(void) int __init z_erofs_init_zip_subsystem(void)
{ {
pcluster_cachep = kmem_cache_create("erofs_compress", pcluster_cachep = kmem_cache_create("erofs_compress",
...@@ -341,26 +331,19 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt, ...@@ -341,26 +331,19 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
struct inode *inode, struct inode *inode,
struct erofs_map_blocks *map) struct erofs_map_blocks *map)
{ {
struct erofs_workgroup *grp; struct z_erofs_pcluster *pcl = clt->pcl;
struct z_erofs_pcluster *pcl;
struct z_erofs_collection *cl; struct z_erofs_collection *cl;
unsigned int length; unsigned int length;
grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT); /* to avoid unexpected loop formed by corrupted images */
if (!grp)
return -ENOENT;
pcl = container_of(grp, struct z_erofs_pcluster, obj);
if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) { if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
DBG_BUGON(1); DBG_BUGON(1);
erofs_workgroup_put(grp);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
cl = z_erofs_primarycollection(pcl); cl = z_erofs_primarycollection(pcl);
if (cl->pageofs != (map->m_la & ~PAGE_MASK)) { if (cl->pageofs != (map->m_la & ~PAGE_MASK)) {
DBG_BUGON(1); DBG_BUGON(1);
erofs_workgroup_put(grp);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
...@@ -368,7 +351,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt, ...@@ -368,7 +351,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) { if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) { if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
DBG_BUGON(1); DBG_BUGON(1);
erofs_workgroup_put(grp);
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
} else { } else {
...@@ -391,7 +373,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt, ...@@ -391,7 +373,6 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt,
/* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */ /* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
clt->tailpcl = NULL; clt->tailpcl = NULL;
clt->pcl = pcl;
clt->cl = cl; clt->cl = cl;
return 0; return 0;
} }
...@@ -402,6 +383,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, ...@@ -402,6 +383,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
{ {
struct z_erofs_pcluster *pcl; struct z_erofs_pcluster *pcl;
struct z_erofs_collection *cl; struct z_erofs_collection *cl;
struct erofs_workgroup *grp;
int err; int err;
/* no available workgroup, let's allocate one */ /* no available workgroup, let's allocate one */
...@@ -409,7 +391,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, ...@@ -409,7 +391,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
if (!pcl) if (!pcl)
return -ENOMEM; return -ENOMEM;
z_erofs_pcluster_init_always(pcl); atomic_set(&pcl->obj.refcount, 1);
pcl->obj.index = map->m_pa >> PAGE_SHIFT; pcl->obj.index = map->m_pa >> PAGE_SHIFT;
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) | pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
...@@ -429,19 +411,29 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, ...@@ -429,19 +411,29 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
clt->mode = COLLECT_PRIMARY_FOLLOWED; clt->mode = COLLECT_PRIMARY_FOLLOWED;
cl = z_erofs_primarycollection(pcl); cl = z_erofs_primarycollection(pcl);
/* must be cleaned before freeing to slab */
DBG_BUGON(cl->nr_pages);
DBG_BUGON(cl->vcnt);
cl->pageofs = map->m_la & ~PAGE_MASK; cl->pageofs = map->m_la & ~PAGE_MASK;
/* /*
* lock all primary followed works before visible to others * lock all primary followed works before visible to others
* and mutex_trylock *never* fails for a new pcluster. * and mutex_trylock *never* fails for a new pcluster.
*/ */
mutex_trylock(&cl->lock); DBG_BUGON(!mutex_trylock(&cl->lock));
err = erofs_register_workgroup(inode->i_sb, &pcl->obj); grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
if (err) { if (IS_ERR(grp)) {
mutex_unlock(&cl->lock); err = PTR_ERR(grp);
kmem_cache_free(pcluster_cachep, pcl); goto err_out;
return -EAGAIN; }
if (grp != &pcl->obj) {
clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
err = -EEXIST;
goto err_out;
} }
/* used to check tail merging loop due to corrupted images */ /* used to check tail merging loop due to corrupted images */
if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
...@@ -450,12 +442,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, ...@@ -450,12 +442,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
clt->pcl = pcl; clt->pcl = pcl;
clt->cl = cl; clt->cl = cl;
return 0; return 0;
err_out:
mutex_unlock(&cl->lock);
kmem_cache_free(pcluster_cachep, pcl);
return err;
} }
static int z_erofs_collector_begin(struct z_erofs_collector *clt, static int z_erofs_collector_begin(struct z_erofs_collector *clt,
struct inode *inode, struct inode *inode,
struct erofs_map_blocks *map) struct erofs_map_blocks *map)
{ {
struct erofs_workgroup *grp;
int ret; int ret;
DBG_BUGON(clt->cl); DBG_BUGON(clt->cl);
...@@ -469,21 +467,25 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt, ...@@ -469,21 +467,25 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt,
return -EINVAL; return -EINVAL;
} }
repeat: grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
ret = z_erofs_lookup_collection(clt, inode, map); if (grp) {
if (ret == -ENOENT) { clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
} else {
ret = z_erofs_register_collection(clt, inode, map); ret = z_erofs_register_collection(clt, inode, map);
/* someone registered at the same time, give another try */ if (!ret)
if (ret == -EAGAIN) { goto out;
cond_resched(); if (ret != -EEXIST)
goto repeat; return ret;
}
} }
if (ret) ret = z_erofs_lookup_collection(clt, inode, map);
if (ret) {
erofs_workgroup_put(&clt->pcl->obj);
return ret; return ret;
}
out:
z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
clt->cl->pagevec, clt->cl->vcnt); clt->cl->pagevec, clt->cl->vcnt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment