Commit 098c5dd9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "No outstanding new feature for this cycle.

  Most of these commits are decompression cleanups which are part of the
  ongoing development for subpage/folio compression support as well as
  xattr cleanups for the upcoming xattr bloom filter optimization [1].

  In addition, there are bugfixes to address some corner cases of
  compressed images due to global data de-duplication and arm64 16k
  pages.

  Summary:

   - Fix rare I/O hang on deduplicated compressed images due to loop
     hooked chains

   - Fix compact compression layout of 16k blocks on arm64 devices

   - Fix atomic context detection of async decompression

   - Decompression/Xattr code cleanups"

Link: https://lore.kernel.org/r/20230621083209.116024-1-jefflexu@linux.alibaba.com [1]

* tag 'erofs-for-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: clean up zmap.c
  erofs: remove unnecessary goto
  erofs: Fix detection of atomic context
  erofs: use separate xattr parsers for listxattr/getxattr
  erofs: unify inline/shared xattr iterators for listxattr/getxattr
  erofs: make the size of read data stored in buffer_ofs
  erofs: unify xattr_iter structures
  erofs: use absolute position in xattr iterator
  erofs: fix compact 4B support for 16k block size
  erofs: convert erofs_read_metabuf() to erofs_bread() for xattr
  erofs: use poison pointer to replace the hard-coded address
  erofs: use struct lockref to replace handcrafted approach
  erofs: adapt managed inode operations into folios
  erofs: kill hooked chains to avoid loops on deduplicated compressed images
  erofs: avoid on-stack pagepool directly passed by arguments
  erofs: allocate extra bvec pages directly instead of retrying
  erofs: clean up z_erofs_pcluster_readmore()
  erofs: remove the member readahead from struct z_erofs_decompress_frontend
  erofs: fold in z_erofs_decompress()
parents 74774e24 8241fdd3
......@@ -89,8 +89,7 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
extern const struct z_erofs_decompressor erofs_decompressors[];
/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
......
......@@ -363,7 +363,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
return 0;
}
static struct z_erofs_decompressor decompressors[] = {
const struct z_erofs_decompressor erofs_decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.decompress = z_erofs_transform_plain,
.name = "shifted"
......@@ -383,9 +383,3 @@ static struct z_erofs_decompressor decompressors[] = {
},
#endif
};
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
return decompressors[rq->alg].decompress(rq, pagepool);
}
......@@ -208,46 +208,12 @@ enum {
EROFS_ZIP_CACHE_READAROUND
};
#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)
/* basic unit of the workstation of a super_block */
struct erofs_workgroup {
/* the workgroup index in the workstation */
pgoff_t index;
/* overall workgroup reference count */
atomic_t refcount;
struct lockref lockref;
};
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
preempt_disable();
if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
preempt_enable();
return false;
}
return true;
}
static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
int orig_val)
{
/*
* other observers should notice all modifications
* in the freezing period.
*/
smp_mb();
atomic_set(&grp->refcount, orig_val);
preempt_enable();
}
static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
{
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}
enum erofs_kmap_type {
EROFS_NO_KMAP, /* don't map the buffer */
EROFS_KMAP, /* use kmap_local_page() to map the buffer */
......@@ -486,7 +452,7 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
void erofs_release_pages(struct page **pagepool);
#ifdef CONFIG_EROFS_FS_ZIP
int erofs_workgroup_put(struct erofs_workgroup *grp);
void erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index);
struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
......@@ -500,7 +466,6 @@ int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void);
int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp);
int erofs_try_to_free_cached_page(struct page *page);
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len);
......@@ -511,6 +476,7 @@ void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
void __init erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
int erofs_init_managed_cache(struct super_block *sb);
#else
static inline void erofs_shrinker_register(struct super_block *sb) {}
static inline void erofs_shrinker_unregister(struct super_block *sb) {}
......@@ -530,6 +496,7 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
}
static inline void erofs_pcpubuf_init(void) {}
static inline void erofs_pcpubuf_exit(void) {}
static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
......
......@@ -599,68 +599,6 @@ static int erofs_fc_parse_param(struct fs_context *fc,
return 0;
}
#ifdef CONFIG_EROFS_FS_ZIP
static const struct address_space_operations managed_cache_aops;
static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp)
{
bool ret = true;
struct address_space *const mapping = folio->mapping;
DBG_BUGON(!folio_test_locked(folio));
DBG_BUGON(mapping->a_ops != &managed_cache_aops);
if (folio_test_private(folio))
ret = erofs_try_to_free_cached_page(&folio->page);
return ret;
}
/*
* It will be called only on inode eviction. In case that there are still some
* decompression requests in progress, wait with rescheduling for a bit here.
* We could introduce an extra locking instead but it seems unnecessary.
*/
static void erofs_managed_cache_invalidate_folio(struct folio *folio,
size_t offset, size_t length)
{
const size_t stop = length + offset;
DBG_BUGON(!folio_test_locked(folio));
/* Check for potential overflow in debug mode */
DBG_BUGON(stop > folio_size(folio) || stop < length);
if (offset == 0 && stop == folio_size(folio))
while (!erofs_managed_cache_release_folio(folio, GFP_NOFS))
cond_resched();
}
static const struct address_space_operations managed_cache_aops = {
.release_folio = erofs_managed_cache_release_folio,
.invalidate_folio = erofs_managed_cache_invalidate_folio,
};
static int erofs_init_managed_cache(struct super_block *sb)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
struct inode *const inode = new_inode(sb);
if (!inode)
return -ENOMEM;
set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &managed_cache_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->managed_cache = inode;
return 0;
}
#else
static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif
static struct inode *erofs_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
{
......@@ -1016,10 +954,8 @@ static int __init erofs_module_init(void)
sizeof(struct erofs_inode), 0,
SLAB_RECLAIM_ACCOUNT,
erofs_inode_init_once);
if (!erofs_inode_cachep) {
err = -ENOMEM;
goto icache_err;
}
if (!erofs_inode_cachep)
return -ENOMEM;
err = erofs_init_shrinker();
if (err)
......@@ -1054,7 +990,6 @@ static int __init erofs_module_init(void)
erofs_exit_shrinker();
shrinker_err:
kmem_cache_destroy(erofs_inode_cachep);
icache_err:
return err;
}
......
......@@ -4,7 +4,6 @@
* https://www.huawei.com/
*/
#include "internal.h"
#include <linux/pagevec.h>
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
{
......@@ -33,22 +32,21 @@ void erofs_release_pages(struct page **pagepool)
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;
static int erofs_workgroup_get(struct erofs_workgroup *grp)
static bool erofs_workgroup_get(struct erofs_workgroup *grp)
{
int o;
repeat:
o = erofs_wait_on_workgroup_freezed(grp);
if (o <= 0)
return -1;
if (lockref_get_not_zero(&grp->lockref))
return true;
if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
goto repeat;
spin_lock(&grp->lockref.lock);
if (__lockref_is_dead(&grp->lockref)) {
spin_unlock(&grp->lockref.lock);
return false;
}
/* decrease refcount paired by erofs_workgroup_put */
if (o == 1)
if (!grp->lockref.count++)
atomic_long_dec(&erofs_global_shrink_cnt);
return 0;
spin_unlock(&grp->lockref.lock);
return true;
}
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
......@@ -61,7 +59,7 @@ struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
rcu_read_lock();
grp = xa_load(&sbi->managed_pslots, index);
if (grp) {
if (erofs_workgroup_get(grp)) {
if (!erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */
rcu_read_unlock();
goto repeat;
......@@ -80,11 +78,10 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
struct erofs_workgroup *pre;
/*
* Bump up a reference count before making this visible
* to others for the XArray in order to avoid potential
* UAF without serialized by xa_lock.
* Bump up before making this visible to others for the XArray in order
* to avoid potential UAF without serialized by xa_lock.
*/
atomic_inc(&grp->refcount);
lockref_get(&grp->lockref);
repeat:
xa_lock(&sbi->managed_pslots);
......@@ -93,13 +90,13 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
if (pre) {
if (xa_is_err(pre)) {
pre = ERR_PTR(xa_err(pre));
} else if (erofs_workgroup_get(pre)) {
} else if (!erofs_workgroup_get(pre)) {
/* try to legitimize the current in-tree one */
xa_unlock(&sbi->managed_pslots);
cond_resched();
goto repeat;
}
atomic_dec(&grp->refcount);
lockref_put_return(&grp->lockref);
grp = pre;
}
xa_unlock(&sbi->managed_pslots);
......@@ -112,38 +109,34 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp)
erofs_workgroup_free_rcu(grp);
}
int erofs_workgroup_put(struct erofs_workgroup *grp)
void erofs_workgroup_put(struct erofs_workgroup *grp)
{
int count = atomic_dec_return(&grp->refcount);
if (lockref_put_or_lock(&grp->lockref))
return;
if (count == 1)
DBG_BUGON(__lockref_is_dead(&grp->lockref));
if (grp->lockref.count == 1)
atomic_long_inc(&erofs_global_shrink_cnt);
else if (!count)
__erofs_workgroup_free(grp);
return count;
--grp->lockref.count;
spin_unlock(&grp->lockref.lock);
}
static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp)
{
/*
* If managed cache is on, refcount of workgroups
* themselves could be < 0 (freezed). In other words,
* there is no guarantee that all refcounts > 0.
*/
if (!erofs_workgroup_try_to_freeze(grp, 1))
return false;
int free = false;
spin_lock(&grp->lockref.lock);
if (grp->lockref.count)
goto out;
/*
* Note that all cached pages should be unattached
* before deleted from the XArray. Otherwise some
* cached pages could be still attached to the orphan
* old workgroup when the new one is available in the tree.
* Note that all cached pages should be detached before deleted from
* the XArray. Otherwise some cached pages could be still attached to
* the orphan old workgroup when the new one is available in the tree.
*/
if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
erofs_workgroup_unfreeze(grp, 1);
return false;
}
if (erofs_try_to_free_all_cached_pages(sbi, grp))
goto out;
/*
* It's impossible to fail after the workgroup is freezed,
......@@ -152,10 +145,13 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
*/
DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
/* last refcount should be connected with its managed pslot. */
erofs_workgroup_unfreeze(grp, 0);
lockref_mark_dead(&grp->lockref);
free = true;
out:
spin_unlock(&grp->lockref.lock);
if (free)
__erofs_workgroup_free(grp);
return true;
return free;
}
static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
......
This diff is collapsed.
This diff is collapsed.
......@@ -22,7 +22,7 @@ struct z_erofs_maprecorder {
bool partialref;
};
static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
unsigned long lcn)
{
struct inode *const inode = m->inode;
......@@ -129,7 +129,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
u8 *in, type;
bool big_pcluster;
if (1 << amortizedshift == 4)
if (1 << amortizedshift == 4 && lclusterbits <= 14)
vcnt = 2;
else if (1 << amortizedshift == 2 && lclusterbits == 12)
vcnt = 16;
......@@ -226,12 +226,11 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
return 0;
}
static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
unsigned long lcn, bool lookahead)
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
unsigned int totalidx = erofs_iblks(inode);
......@@ -239,9 +238,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int amortizedshift;
erofs_off_t pos;
if (lclusterbits != 12)
return -EOPNOTSUPP;
if (lcn >= totalidx)
return -EINVAL;
......@@ -281,23 +277,23 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
return unpack_compacted_index(m, amortizedshift, pos, lookahead);
}
static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int lcn, bool lookahead)
{
const unsigned int datamode = EROFS_I(m->inode)->datalayout;
if (datamode == EROFS_INODE_COMPRESSED_FULL)
return legacy_load_cluster_from_disk(m, lcn);
if (datamode == EROFS_INODE_COMPRESSED_COMPACT)
return compacted_load_cluster_from_disk(m, lcn, lookahead);
switch (EROFS_I(m->inode)->datalayout) {
case EROFS_INODE_COMPRESSED_FULL:
return z_erofs_load_full_lcluster(m, lcn);
case EROFS_INODE_COMPRESSED_COMPACT:
return z_erofs_load_compact_lcluster(m, lcn, lookahead);
default:
return -EINVAL;
}
}
static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned int lookback_distance)
{
struct super_block *sb = m->inode->i_sb;
struct erofs_inode *const vi = EROFS_I(m->inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
......@@ -305,21 +301,15 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned long lcn = m->lcn - lookback_distance;
int err;
/* load extent head logical cluster if needed */
err = z_erofs_load_cluster_from_disk(m, lcn, false);
err = z_erofs_load_lcluster_from_disk(m, lcn, false);
if (err)
return err;
switch (m->type) {
case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
if (!m->delta[0]) {
erofs_err(m->inode->i_sb,
"invalid lookback distance 0 @ nid %llu",
vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
lookback_distance = m->delta[0];
if (!lookback_distance)
goto err_bogus;
continue;
case Z_EROFS_LCLUSTER_TYPE_PLAIN:
case Z_EROFS_LCLUSTER_TYPE_HEAD1:
......@@ -328,16 +318,15 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
return 0;
default:
erofs_err(m->inode->i_sb,
"unknown type %u @ lcn %lu of nid %llu",
erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu",
m->type, lcn, vi->nid);
DBG_BUGON(1);
return -EOPNOTSUPP;
}
}
erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu",
vi->nid);
err_bogus:
erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu",
lookback_distance, m->lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
......@@ -369,7 +358,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
if (m->compressedblks)
goto out;
err = z_erofs_load_cluster_from_disk(m, lcn, false);
err = z_erofs_load_lcluster_from_disk(m, lcn, false);
if (err)
return err;
......@@ -401,9 +390,8 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
break;
fallthrough;
default:
erofs_err(m->inode->i_sb,
"cannot found CBLKCNT @ lcn %lu of nid %llu",
lcn, vi->nid);
erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn,
vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
......@@ -411,9 +399,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
map->m_plen = erofs_pos(sb, m->compressedblks);
return 0;
err_bonus_cblkcnt:
erofs_err(m->inode->i_sb,
"bogus CBLKCNT @ lcn %lu of nid %llu",
lcn, vi->nid);
erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
......@@ -434,7 +420,7 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
return 0;
}
err = z_erofs_load_cluster_from_disk(m, lcn, true);
err = z_erofs_load_lcluster_from_disk(m, lcn, true);
if (err)
return err;
......@@ -481,7 +467,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
initial_lcn = ofs >> lclusterbits;
endoff = ofs & ((1 << lclusterbits) - 1);
err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false);
err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false);
if (err)
goto unmap_out;
......@@ -539,8 +525,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
vi->z_tailextent_headlcn = m.lcn;
/* for non-compact indexes, fragmentoff is 64 bits */
if (fragment &&
vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
vi->z_fragmentoff |= (u64)m.pblk << 32;
}
if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment