Commit e88745dc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'erofs-for-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs

Pull erofs updates from Gao Xiang:
 "First of all, we'd like to add Yue Hu and Jeffle Xu as two new
  reviewers. Thank them for spending time working on EROFS!

  There is no major feature outstanding in this cycle, mainly a patchset
  I worked on to prepare for rolling hash deduplication and folios for
  compressed data as the next big features. It kills the unneeded
  PG_error flag dependency as well.

  Apart from that, there are bugfixes and cleanups as always. Details
  are listed below:

   - Add Yue Hu and Jeffle Xu as reviewers

   - Add the missing wake_up when updating lzma streams

   - Avoid consecutive detection for Highmem memory

   - Prepare for multi-reference pclusters and get rid of PG_error

   - Fix ctx->pos update for NFS export

   - minor cleanups"

* tag 'erofs-for-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: (23 commits)
  erofs: update ctx->pos for every emitted dirent
  erofs: get rid of the leftover PAGE_SIZE in dir.c
  erofs: get rid of erofs_prepare_dio() helper
  erofs: introduce multi-reference pclusters (fully-referenced)
  erofs: record the longest decompressed size in this round
  erofs: introduce z_erofs_do_decompressed_bvec()
  erofs: try to leave (de)compressed_pages on stack if possible
  erofs: introduce struct z_erofs_decompress_backend
  erofs: get rid of `z_pagemap_global'
  erofs: clean up `enum z_erofs_collectmode'
  erofs: get rid of `enum z_erofs_page_type'
  erofs: rework online page handling
  erofs: switch compressed_pages[] to bufvec
  erofs: introduce `z_erofs_parse_in_bvecs'
  erofs: drop the old pagevec approach
  erofs: introduce bufvec to store decompressed buffers
  erofs: introduce `z_erofs_parse_out_bvecs()'
  erofs: clean up z_erofs_collector_begin()
  erofs: get rid of unneeded `inode', `map' and `sb'
  erofs: avoid consecutive detection for Highmem memory
  ...
parents bec14d79 ecce9212
......@@ -7485,6 +7485,8 @@ F: include/video/s1d13xxxfb.h
EROFS FILE SYSTEM
M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <chao@kernel.org>
R: Yue Hu <huyue2@coolpad.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
......
......@@ -17,7 +17,7 @@ struct z_erofs_decompress_req {
/* indicate the algorithm will be used for decompression */
unsigned int alg;
bool inplace_io, partial_decoding;
bool inplace_io, partial_decoding, fillgaps;
};
struct z_erofs_decompressor {
......
......@@ -366,42 +366,33 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
return iomap_bmap(mapping, block, &erofs_iomap_ops);
}
static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to)
static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
loff_t align = iocb->ki_pos | iov_iter_count(to) |
iov_iter_alignment(to);
struct block_device *bdev = inode->i_sb->s_bdev;
unsigned int blksize_mask;
if (bdev)
blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1;
else
blksize_mask = (1 << inode->i_blkbits) - 1;
if (align & blksize_mask)
return -EINVAL;
return 0;
}
static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
/* no need taking (shared) inode lock since it's a ro filesystem */
if (!iov_iter_count(to))
return 0;
#ifdef CONFIG_FS_DAX
if (IS_DAX(iocb->ki_filp->f_mapping->host))
if (IS_DAX(inode))
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
#endif
if (iocb->ki_flags & IOCB_DIRECT) {
int err = erofs_prepare_dio(iocb, to);
struct block_device *bdev = inode->i_sb->s_bdev;
unsigned int blksize_mask;
if (bdev)
blksize_mask = bdev_logical_block_size(bdev) - 1;
else
blksize_mask = (1 << inode->i_blkbits) - 1;
if ((iocb->ki_pos | iov_iter_count(to) |
iov_iter_alignment(to)) & blksize_mask)
return -EINVAL;
if (!err)
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
NULL, 0, NULL, 0);
if (err < 0)
return err;
}
return filemap_read(iocb, to, 0);
}
......
......@@ -83,7 +83,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
j = 0;
/* 'valid' bounced can only be tested after a complete round */
if (test_bit(j, bounced)) {
if (!rq->fillgaps && test_bit(j, bounced)) {
DBG_BUGON(i < lz4_max_distance_pages);
DBG_BUGON(top >= lz4_max_distance_pages);
availables[top++] = rq->out[i - lz4_max_distance_pages];
......@@ -91,14 +91,18 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
if (page) {
__clear_bit(j, bounced);
if (kaddr) {
if (kaddr + PAGE_SIZE == page_address(page))
kaddr += PAGE_SIZE;
else
kaddr = NULL;
} else if (!i) {
if (!PageHighMem(page)) {
if (!i) {
kaddr = page_address(page);
continue;
}
if (kaddr &&
kaddr + PAGE_SIZE == page_address(page)) {
kaddr += PAGE_SIZE;
continue;
}
}
kaddr = NULL;
continue;
}
kaddr = NULL;
......
......@@ -143,6 +143,7 @@ int z_erofs_load_lzma_config(struct super_block *sb,
DBG_BUGON(z_erofs_lzma_head);
z_erofs_lzma_head = head;
spin_unlock(&z_erofs_lzma_lock);
wake_up_all(&z_erofs_lzma_wq);
z_erofs_lzma_max_dictsize = dict_size;
mutex_unlock(&lzma_resize_mutex);
......
......@@ -22,10 +22,9 @@ static void debug_one_dentry(unsigned char d_type, const char *de_name,
}
static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
void *dentry_blk, unsigned int *ofs,
void *dentry_blk, struct erofs_dirent *de,
unsigned int nameoff, unsigned int maxsize)
{
struct erofs_dirent *de = dentry_blk + *ofs;
const struct erofs_dirent *end = dentry_blk + nameoff;
while (de < end) {
......@@ -59,9 +58,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
/* stopped by some reason */
return 1;
++de;
*ofs += sizeof(struct erofs_dirent);
ctx->pos += sizeof(struct erofs_dirent);
}
*ofs = maxsize;
return 0;
}
......@@ -90,33 +88,33 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
nameoff = le16_to_cpu(de->nameoff);
if (nameoff < sizeof(struct erofs_dirent) ||
nameoff >= PAGE_SIZE) {
nameoff >= EROFS_BLKSIZ) {
erofs_err(dir->i_sb,
"invalid de[0].nameoff %u @ nid %llu",
nameoff, EROFS_I(dir)->nid);
err = -EFSCORRUPTED;
goto skip_this;
break;
}
maxsize = min_t(unsigned int,
dirsize - ctx->pos + ofs, PAGE_SIZE);
dirsize - ctx->pos + ofs, EROFS_BLKSIZ);
/* search dirents at the arbitrary position */
if (initial) {
initial = false;
ofs = roundup(ofs, sizeof(struct erofs_dirent));
ctx->pos = blknr_to_addr(i) + ofs;
if (ofs >= nameoff)
goto skip_this;
}
err = erofs_fill_dentries(dir, ctx, de, &ofs,
err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs,
nameoff, maxsize);
skip_this:
ctx->pos = blknr_to_addr(i) + ofs;
if (err)
break;
skip_this:
ctx->pos = blknr_to_addr(i) + maxsize;
++i;
ofs = 0;
}
......
......@@ -2,6 +2,7 @@
/*
* Copyright (C) 2018 HUAWEI, Inc.
* https://www.huawei.com/
* Copyright (C) 2022 Alibaba Cloud
*/
#include "zdata.h"
#include "compress.h"
......@@ -26,6 +27,82 @@ static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
};
struct z_erofs_bvec_iter {
struct page *bvpage;
struct z_erofs_bvset *bvset;
unsigned int nr, cur;
};
static struct page *z_erofs_bvec_iter_end(struct z_erofs_bvec_iter *iter)
{
if (iter->bvpage)
kunmap_local(iter->bvset);
return iter->bvpage;
}
static struct page *z_erofs_bvset_flip(struct z_erofs_bvec_iter *iter)
{
unsigned long base = (unsigned long)((struct z_erofs_bvset *)0)->bvec;
/* have to access nextpage in advance, otherwise it will be unmapped */
struct page *nextpage = iter->bvset->nextpage;
struct page *oldpage;
DBG_BUGON(!nextpage);
oldpage = z_erofs_bvec_iter_end(iter);
iter->bvpage = nextpage;
iter->bvset = kmap_local_page(nextpage);
iter->nr = (PAGE_SIZE - base) / sizeof(struct z_erofs_bvec);
iter->cur = 0;
return oldpage;
}
static void z_erofs_bvec_iter_begin(struct z_erofs_bvec_iter *iter,
struct z_erofs_bvset_inline *bvset,
unsigned int bootstrap_nr,
unsigned int cur)
{
*iter = (struct z_erofs_bvec_iter) {
.nr = bootstrap_nr,
.bvset = (struct z_erofs_bvset *)bvset,
};
while (cur > iter->nr) {
cur -= iter->nr;
z_erofs_bvset_flip(iter);
}
iter->cur = cur;
}
static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
struct z_erofs_bvec *bvec,
struct page **candidate_bvpage)
{
if (iter->cur == iter->nr) {
if (!*candidate_bvpage)
return -EAGAIN;
DBG_BUGON(iter->bvset->nextpage);
iter->bvset->nextpage = *candidate_bvpage;
z_erofs_bvset_flip(iter);
iter->bvset->nextpage = NULL;
*candidate_bvpage = NULL;
}
iter->bvset->bvec[iter->cur++] = *bvec;
return 0;
}
static void z_erofs_bvec_dequeue(struct z_erofs_bvec_iter *iter,
struct z_erofs_bvec *bvec,
struct page **old_bvpage)
{
if (iter->cur == iter->nr)
*old_bvpage = z_erofs_bvset_flip(iter);
else
*old_bvpage = NULL;
*bvec = iter->bvset->bvec[iter->cur++];
}
static void z_erofs_destroy_pcluster_pool(void)
{
int i;
......@@ -46,7 +123,7 @@ static int z_erofs_create_pcluster_pool(void)
for (pcs = pcluster_pool;
pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) {
size = struct_size(a, compressed_pages, pcs->maxpages);
size = struct_size(a, compressed_bvecs, pcs->maxpages);
sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages);
pcs->slab = kmem_cache_create(pcs->name, size, 0,
......@@ -150,30 +227,29 @@ int __init z_erofs_init_zip_subsystem(void)
return err;
}
enum z_erofs_collectmode {
COLLECT_SECONDARY,
COLLECT_PRIMARY,
enum z_erofs_pclustermode {
Z_EROFS_PCLUSTER_INFLIGHT,
/*
* The current collection was the tail of an exist chain, in addition
* that the previous processed chained collections are all decided to
* The current pclusters was the tail of an exist chain, in addition
* that the previous processed chained pclusters are all decided to
* be hooked up to it.
* A new chain will be created for the remaining collections which are
* not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
* the next collection cannot reuse the whole page safely in
* the following scenario:
* A new chain will be created for the remaining pclusters which are
* not processed yet, so different from Z_EROFS_PCLUSTER_FOLLOWED,
* the next pcluster cannot reuse the whole page safely for inplace I/O
* in the following scenario:
* ________________________________________________________________
* | tail (partial) page | head (partial) page |
* | (belongs to the next cl) | (belongs to the current cl) |
* |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
* | (belongs to the next pcl) | (belongs to the current pcl) |
* |_______PCLUSTER_FOLLOWED______|________PCLUSTER_HOOKED__________|
*/
COLLECT_PRIMARY_HOOKED,
Z_EROFS_PCLUSTER_HOOKED,
/*
* a weak form of COLLECT_PRIMARY_FOLLOWED, the difference is that it
* a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it
* could be dispatched into bypass queue later due to uptodated managed
* pages. All related online pages cannot be reused for inplace I/O (or
* pagevec) since it can be directly decoded without I/O submission.
* bvpage) since it can be directly decoded without I/O submission.
*/
COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
/*
* The current collection has been linked with the owned chain, and
* could also be linked with the remaining collections, which means
......@@ -184,39 +260,36 @@ enum z_erofs_collectmode {
* ________________________________________________________________
* | tail (partial) page | head (partial) page |
* | (of the current cl) | (of the previous collection) |
* | PRIMARY_FOLLOWED or | |
* |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
* | PCLUSTER_FOLLOWED or | |
* |_____PCLUSTER_HOOKED__|___________PCLUSTER_FOLLOWED____________|
*
* [ (*) the above page can be used as inplace I/O. ]
*/
COLLECT_PRIMARY_FOLLOWED,
Z_EROFS_PCLUSTER_FOLLOWED,
};
struct z_erofs_decompress_frontend {
struct inode *const inode;
struct erofs_map_blocks map;
struct z_erofs_bvec_iter biter;
struct z_erofs_pagevec_ctor vector;
struct page *candidate_bvpage;
struct z_erofs_pcluster *pcl, *tailpcl;
/* a pointer used to pick up inplace I/O pages */
struct page **icpage_ptr;
z_erofs_next_pcluster_t owned_head;
enum z_erofs_collectmode mode;
enum z_erofs_pclustermode mode;
bool readahead;
/* used for applying cache strategy on the fly */
bool backmost;
erofs_off_t headoffset;
/* a pointer used to pick up inplace I/O pages */
unsigned int icur;
};
#define DECOMPRESS_FRONTEND_INIT(__i) { \
.inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
.mode = COLLECT_PRIMARY_FOLLOWED, .backmost = true }
static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
static DEFINE_MUTEX(z_pagemap_global_lock);
.mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true }
static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
enum z_erofs_cache_alloctype type,
......@@ -231,24 +304,21 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
*/
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
struct page **pages;
pgoff_t index;
unsigned int i;
if (fe->mode < COLLECT_PRIMARY_FOLLOWED)
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
return;
pages = pcl->compressed_pages;
index = pcl->obj.index;
for (; index < pcl->obj.index + pcl->pclusterpages; ++index, ++pages) {
for (i = 0; i < pcl->pclusterpages; ++i) {
struct page *page;
compressed_page_t t;
struct page *newpage = NULL;
/* the compressed page was loaded before */
if (READ_ONCE(*pages))
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
page = find_get_page(mc, index);
page = find_get_page(mc, pcl->obj.index + i);
if (page) {
t = tag_compressed_page_justfound(page);
......@@ -269,7 +339,8 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
}
}
if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL,
tagptr_cast_ptr(t)))
continue;
if (page)
......@@ -283,7 +354,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
* managed cache since it can be moved to the bypass queue instead.
*/
if (standalone)
fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
/* called by erofs_shrinker to get rid of all compressed_pages */
......@@ -300,7 +371,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
* therefore no need to worry about available decompression users.
*/
for (i = 0; i < pcl->pclusterpages; ++i) {
struct page *page = pcl->compressed_pages[i];
struct page *page = pcl->compressed_bvecs[i].page;
if (!page)
continue;
......@@ -313,7 +384,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
continue;
/* barrier is implied in the following 'unlock_page' */
WRITE_ONCE(pcl->compressed_pages[i], NULL);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
detach_page_private(page);
unlock_page(page);
}
......@@ -323,56 +394,59 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
int erofs_try_to_free_cached_page(struct page *page)
{
struct z_erofs_pcluster *const pcl = (void *)page_private(page);
int ret = 0; /* 0 - busy */
int ret, i;
if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
unsigned int i;
if (!erofs_workgroup_try_to_freeze(&pcl->obj, 1))
return 0;
ret = 0;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pcl->pclusterpages; ++i) {
if (pcl->compressed_pages[i] == page) {
WRITE_ONCE(pcl->compressed_pages[i], NULL);
if (pcl->compressed_bvecs[i].page == page) {
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
ret = 1;
break;
}
}
erofs_workgroup_unfreeze(&pcl->obj, 1);
if (ret)
detach_page_private(page);
}
return ret;
}
/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
struct page *page)
struct z_erofs_bvec *bvec)
{
struct z_erofs_pcluster *const pcl = fe->pcl;
while (fe->icpage_ptr > pcl->compressed_pages)
if (!cmpxchg(--fe->icpage_ptr, NULL, page))
while (fe->icur > 0) {
if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
NULL, bvec->page)) {
pcl->compressed_bvecs[fe->icur] = *bvec;
return true;
}
}
return false;
}
/* callers must be with pcluster lock held */
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
struct page *page, enum z_erofs_page_type type,
bool pvec_safereuse)
struct z_erofs_bvec *bvec, bool exclusive)
{
int ret;
/* give priority for inplaceio */
if (fe->mode >= COLLECT_PRIMARY &&
type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
z_erofs_try_inplace_io(fe, page))
if (exclusive) {
/* give priority for inplaceio to use file pages first */
if (z_erofs_try_inplace_io(fe, bvec))
return 0;
ret = z_erofs_pagevec_enqueue(&fe->vector, page, type,
pvec_safereuse);
fe->pcl->vcnt += (unsigned int)ret;
return ret ? 0 : -EAGAIN;
/* otherwise, check if it can be used as a bvpage */
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
!fe->candidate_bvpage)
fe->candidate_bvpage = bvec->page;
}
ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage);
fe->pcl->vcnt += (ret >= 0);
return ret;
}
static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
......@@ -385,7 +459,7 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
*owned_head) == Z_EROFS_PCLUSTER_NIL) {
*owned_head = &pcl->next;
/* so we can attach this pcluster to our submission chain. */
f->mode = COLLECT_PRIMARY_FOLLOWED;
f->mode = Z_EROFS_PCLUSTER_FOLLOWED;
return;
}
......@@ -393,66 +467,21 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
* type 2, link to the end of an existing open chain, be careful
* that its submission is controlled by the original attached chain.
*/
if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
if (*owned_head != &pcl->next && pcl != f->tailpcl &&
cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
*owned_head) == Z_EROFS_PCLUSTER_TAIL) {
*owned_head = Z_EROFS_PCLUSTER_TAIL;
f->mode = COLLECT_PRIMARY_HOOKED;
f->mode = Z_EROFS_PCLUSTER_HOOKED;
f->tailpcl = NULL;
return;
}
/* type 3, it belongs to a chain, but it isn't the end of the chain */
f->mode = COLLECT_PRIMARY;
}
static int z_erofs_lookup_pcluster(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
{
struct z_erofs_pcluster *pcl = fe->pcl;
unsigned int length;
/* to avoid unexpected loop formed by corrupted images */
if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
if (pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
length = READ_ONCE(pcl->length);
if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
} else {
unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
if (map->m_flags & EROFS_MAP_FULL_MAPPED)
llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
while (llen > length &&
length != cmpxchg_relaxed(&pcl->length, length, llen)) {
cpu_relax();
length = READ_ONCE(pcl->length);
}
}
mutex_lock(&pcl->lock);
/* used to check tail merging loop due to corrupted images */
if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
fe->tailpcl = pcl;
z_erofs_try_to_claim_pcluster(fe);
return 0;
f->mode = Z_EROFS_PCLUSTER_INFLIGHT;
}
static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl;
struct erofs_workgroup *grp;
......@@ -471,14 +500,13 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
atomic_set(&pcl->obj.refcount, 1);
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
(map->m_flags & EROFS_MAP_FULL_MAPPED ?
Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
pcl->length = 0;
pcl->partial = true;
/* new pclusters should be claimed as type 1, primary and followed */
pcl->next = fe->owned_head;
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
fe->mode = COLLECT_PRIMARY_FOLLOWED;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
/*
* lock all primary followed works before visible to others
......@@ -494,7 +522,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
} else {
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
grp = erofs_insert_workgroup(fe->inode->i_sb, &pcl->obj);
if (IS_ERR(grp)) {
err = PTR_ERR(grp);
goto err_out;
......@@ -520,11 +548,10 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
return err;
}
static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
struct inode *inode,
struct erofs_map_blocks *map)
static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
{
struct erofs_workgroup *grp;
struct erofs_map_blocks *map = &fe->map;
struct erofs_workgroup *grp = NULL;
int ret;
DBG_BUGON(fe->pcl);
......@@ -533,38 +560,35 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
if (map->m_flags & EROFS_MAP_META) {
if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
if (!(map->m_flags & EROFS_MAP_META)) {
grp = erofs_find_workgroup(fe->inode->i_sb,
map->m_pa >> PAGE_SHIFT);
} else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
goto tailpacking;
}
grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
if (grp) {
fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
ret = -EEXIST;
} else {
tailpacking:
ret = z_erofs_register_pcluster(fe, inode, map);
if (!ret)
goto out;
if (ret != -EEXIST)
return ret;
ret = z_erofs_register_pcluster(fe);
}
ret = z_erofs_lookup_pcluster(fe, inode, map);
if (ret) {
erofs_workgroup_put(&fe->pcl->obj);
if (ret == -EEXIST) {
mutex_lock(&fe->pcl->lock);
/* used to check tail merging loop due to corrupted images */
if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
fe->tailpcl = fe->pcl;
z_erofs_try_to_claim_pcluster(fe);
} else if (ret) {
return ret;
}
out:
z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS,
fe->pcl->pagevec, fe->pcl->vcnt);
z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
/* since file-backed online pages are traversed in reverse order */
fe->icpage_ptr = fe->pcl->compressed_pages +
z_erofs_pclusterpages(fe->pcl);
fe->icur = z_erofs_pclusterpages(fe->pcl);
return 0;
}
......@@ -593,14 +617,19 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
if (!pcl)
return false;
z_erofs_pagevec_ctor_exit(&fe->vector, false);
z_erofs_bvec_iter_end(&fe->biter);
mutex_unlock(&pcl->lock);
if (fe->candidate_bvpage) {
DBG_BUGON(z_erofs_is_shortlived_page(fe->candidate_bvpage));
fe->candidate_bvpage = NULL;
}
/*
* if all pending pages are added, don't hold its reference
* any longer if the pcluster isn't hosted by ourselves.
*/
if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
erofs_workgroup_put(&pcl->obj);
fe->pcl = NULL;
......@@ -628,11 +657,10 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct erofs_map_blocks *const map = &fe->map;
const loff_t offset = page_offset(page);
bool tight = true;
bool tight = true, exclusive;
enum z_erofs_cache_alloctype cache_strategy;
enum z_erofs_page_type page_type;
unsigned int cur, end, spiltted, index;
unsigned int cur, end, spiltted;
int err = 0;
/* register locked file pages as online pages in pack */
......@@ -653,7 +681,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
map->m_llen = 0;
err = z_erofs_map_blocks_iter(inode, map, 0);
if (err)
goto err_out;
goto out;
} else {
if (fe->pcl)
goto hitted;
......@@ -663,9 +691,9 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
if (!(map->m_flags & EROFS_MAP_MAPPED))
goto hitted;
err = z_erofs_collector_begin(fe, inode, map);
err = z_erofs_collector_begin(fe);
if (err)
goto err_out;
goto out;
if (z_erofs_is_inline_pcluster(fe->pcl)) {
void *mp;
......@@ -676,11 +704,12 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
err = PTR_ERR(mp);
erofs_err(inode->i_sb,
"failed to get inline page, err %d", err);
goto err_out;
goto out;
}
get_page(fe->map.buf.page);
WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page);
fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
WRITE_ONCE(fe->pcl->compressed_bvecs[0].page,
fe->map.buf.page);
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
} else {
/* bind cache first when cached decompression is preferred */
if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
......@@ -696,10 +725,10 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
* Ensure the current partial page belongs to this submit chain rather
* than other concurrent submit chains or the noio(bypass) chain since
* those chains are handled asynchronously thus the page cannot be used
* for inplace I/O or pagevec (should be processed in strict order.)
* for inplace I/O or bvpage (should be processed in a strict order.)
*/
tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED &&
fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
tight &= (fe->mode >= Z_EROFS_PCLUSTER_HOOKED &&
fe->mode != Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
cur = end - min_t(unsigned int, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
......@@ -707,60 +736,59 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
goto next_part;
}
/* let's derive page type */
page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
(!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
(tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
exclusive = (!cur && (!spiltted || tight));
if (cur)
tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED);
tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
retry:
err = z_erofs_attach_page(fe, page, page_type,
fe->mode >= COLLECT_PRIMARY_FOLLOWED);
/* should allocate an additional short-lived page for pagevec */
if (err == -EAGAIN) {
struct page *const newpage =
alloc_page(GFP_NOFS | __GFP_NOFAIL);
set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
err = z_erofs_attach_page(fe, newpage,
Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
if (!err)
err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
.page = page,
.offset = offset - map->m_la,
.end = end,
}), exclusive);
/* should allocate an additional short-lived page for bvset */
if (err == -EAGAIN && !fe->candidate_bvpage) {
fe->candidate_bvpage = alloc_page(GFP_NOFS | __GFP_NOFAIL);
set_page_private(fe->candidate_bvpage,
Z_EROFS_SHORTLIVED_PAGE);
goto retry;
}
if (err)
goto err_out;
index = page->index - (map->m_la >> PAGE_SHIFT);
z_erofs_onlinepage_fixup(page, index, true);
if (err) {
DBG_BUGON(err == -EAGAIN && fe->candidate_bvpage);
goto out;
}
z_erofs_onlinepage_split(page);
/* bump up the number of spiltted parts of a page */
++spiltted;
/* also update nr_pages */
fe->pcl->nr_pages = max_t(pgoff_t, fe->pcl->nr_pages, index + 1);
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true;
if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
fe->pcl->length == map->m_llen)
fe->pcl->partial = false;
if (fe->pcl->length < offset + end - map->m_la) {
fe->pcl->length = offset + end - map->m_la;
fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
}
next_part:
/* can be used for verification */
/* shorten the remaining extent to update progress */
map->m_llen = offset + cur - map->m_la;
map->m_flags &= ~EROFS_MAP_FULL_MAPPED;
end = cur;
if (end > 0)
goto repeat;
out:
if (err)
z_erofs_page_mark_eio(page);
z_erofs_onlinepage_endio(page);
erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu",
__func__, page, spiltted, map->m_llen);
return err;
/* if some error occurred while processing this page */
err_out:
SetPageError(page);
goto out;
}
static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
......@@ -783,97 +811,137 @@ static bool z_erofs_page_is_invalidated(struct page *page)
return !page->mapping && !z_erofs_is_shortlived_page(page);
}
static int z_erofs_decompress_pcluster(struct super_block *sb,
struct z_erofs_pcluster *pcl,
struct page **pagepool)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
struct z_erofs_pagevec_ctor ctor;
unsigned int i, inputsize, outputsize, llen, nr_pages;
struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
struct page **pages, **compressed_pages, *page;
struct z_erofs_decompress_backend {
struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES];
struct super_block *sb;
struct z_erofs_pcluster *pcl;
enum z_erofs_page_type page_type;
bool overlapped, partial;
int err;
/* pages with the longest decompressed length for deduplication */
struct page **decompressed_pages;
/* pages to keep the compressed data */
struct page **compressed_pages;
might_sleep();
DBG_BUGON(!READ_ONCE(pcl->nr_pages));
struct list_head decompressed_secondary_bvecs;
struct page **pagepool;
unsigned int onstack_used, nr_pages;
};
mutex_lock(&pcl->lock);
nr_pages = pcl->nr_pages;
struct z_erofs_bvec_item {
struct z_erofs_bvec bvec;
struct list_head list;
};
if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) {
pages = pages_onstack;
} else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
mutex_trylock(&z_pagemap_global_lock)) {
pages = z_pagemap_global;
} else {
gfp_t gfp_flags = GFP_KERNEL;
static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
struct z_erofs_bvec *bvec)
{
struct z_erofs_bvec_item *item;
if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
gfp_flags |= __GFP_NOFAIL;
if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
unsigned int pgnr;
struct page *oldpage;
pages = kvmalloc_array(nr_pages, sizeof(struct page *),
gfp_flags);
pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
DBG_BUGON(pgnr >= be->nr_pages);
oldpage = be->decompressed_pages[pgnr];
be->decompressed_pages[pgnr] = bvec->page;
/* fallback to global pagemap for the lowmem scenario */
if (!pages) {
mutex_lock(&z_pagemap_global_lock);
pages = z_pagemap_global;
}
if (!oldpage)
return;
}
for (i = 0; i < nr_pages; ++i)
pages[i] = NULL;
err = 0;
z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
pcl->pagevec, 0);
for (i = 0; i < pcl->vcnt; ++i) {
unsigned int pagenr;
/* (cold path) one pcluster is requested multiple times */
item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_NOFAIL);
item->bvec = *bvec;
list_add(&item->list, &be->decompressed_secondary_bvecs);
}
page = z_erofs_pagevec_dequeue(&ctor, &page_type);
static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
int err)
{
unsigned int off0 = be->pcl->pageofs_out;
struct list_head *p, *n;
list_for_each_safe(p, n, &be->decompressed_secondary_bvecs) {
struct z_erofs_bvec_item *bvi;
unsigned int end, cur;
void *dst, *src;
bvi = container_of(p, struct z_erofs_bvec_item, list);
cur = bvi->bvec.offset < 0 ? -bvi->bvec.offset : 0;
end = min_t(unsigned int, be->pcl->length - bvi->bvec.offset,
bvi->bvec.end);
dst = kmap_local_page(bvi->bvec.page);
while (cur < end) {
unsigned int pgnr, scur, len;
pgnr = (bvi->bvec.offset + cur + off0) >> PAGE_SHIFT;
DBG_BUGON(pgnr >= be->nr_pages);
scur = bvi->bvec.offset + cur -
((pgnr << PAGE_SHIFT) - off0);
len = min_t(unsigned int, end - cur, PAGE_SIZE - scur);
if (!be->decompressed_pages[pgnr]) {
err = -EFSCORRUPTED;
cur += len;
continue;
}
src = kmap_local_page(be->decompressed_pages[pgnr]);
memcpy(dst + cur, src + scur, len);
kunmap_local(src);
cur += len;
}
kunmap_local(dst);
if (err)
z_erofs_page_mark_eio(bvi->bvec.page);
z_erofs_onlinepage_endio(bvi->bvec.page);
list_del(p);
kfree(bvi);
}
}
/* all pages in pagevec ought to be valid */
DBG_BUGON(!page);
DBG_BUGON(z_erofs_page_is_invalidated(page));
static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
{
struct z_erofs_pcluster *pcl = be->pcl;
struct z_erofs_bvec_iter biter;
struct page *old_bvpage;
int i;
if (z_erofs_put_shortlivedpage(pagepool, page))
continue;
z_erofs_bvec_iter_begin(&biter, &pcl->bvset, Z_EROFS_INLINE_BVECS, 0);
for (i = 0; i < pcl->vcnt; ++i) {
struct z_erofs_bvec bvec;
if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
pagenr = 0;
else
pagenr = z_erofs_onlinepage_index(page);
z_erofs_bvec_dequeue(&biter, &bvec, &old_bvpage);
DBG_BUGON(pagenr >= nr_pages);
if (old_bvpage)
z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
/*
* currently EROFS doesn't support multiref(dedup),
* so here erroring out one multiref page.
*/
if (pages[pagenr]) {
DBG_BUGON(1);
SetPageError(pages[pagenr]);
z_erofs_onlinepage_endio(pages[pagenr]);
err = -EFSCORRUPTED;
}
pages[pagenr] = page;
DBG_BUGON(z_erofs_page_is_invalidated(bvec.page));
z_erofs_do_decompressed_bvec(be, &bvec);
}
z_erofs_pagevec_ctor_exit(&ctor, true);
overlapped = false;
compressed_pages = pcl->compressed_pages;
old_bvpage = z_erofs_bvec_iter_end(&biter);
if (old_bvpage)
z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
}
static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
bool *overlapped)
{
struct z_erofs_pcluster *pcl = be->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
int i, err = 0;
*overlapped = false;
for (i = 0; i < pclusterpages; ++i) {
unsigned int pagenr;
struct z_erofs_bvec *bvec = &pcl->compressed_bvecs[i];
struct page *page = bvec->page;
page = compressed_pages[i];
/* all compressed pages ought to be valid */
DBG_BUGON(!page);
/* compressed pages ought to be present before decompressing */
if (!page) {
DBG_BUGON(1);
continue;
}
be->compressed_pages[i] = page;
if (z_erofs_is_inline_pcluster(pcl)) {
if (!PageUptodate(page))
......@@ -883,109 +951,129 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) {
if (erofs_page_is_managed(sbi, page)) {
if (erofs_page_is_managed(EROFS_SB(be->sb), page)) {
if (!PageUptodate(page))
err = -EIO;
continue;
}
/*
* only if non-head page can be selected
* for inplace decompression
*/
pagenr = z_erofs_onlinepage_index(page);
DBG_BUGON(pagenr >= nr_pages);
if (pages[pagenr]) {
DBG_BUGON(1);
SetPageError(pages[pagenr]);
z_erofs_onlinepage_endio(pages[pagenr]);
err = -EFSCORRUPTED;
z_erofs_do_decompressed_bvec(be, bvec);
*overlapped = true;
}
pages[pagenr] = page;
overlapped = true;
}
/* PG_error needs checking for all non-managed pages */
if (PageError(page)) {
DBG_BUGON(PageUptodate(page));
err = -EIO;
}
}
if (err)
return err;
return 0;
}
static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
int err)
{
struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
struct z_erofs_pcluster *pcl = be->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
unsigned int i, inputsize;
int err2;
struct page *page;
bool overlapped;
mutex_lock(&pcl->lock);
be->nr_pages = PAGE_ALIGN(pcl->length + pcl->pageofs_out) >> PAGE_SHIFT;
/* allocate (de)compressed page arrays if cannot be kept on stack */
be->decompressed_pages = NULL;
be->compressed_pages = NULL;
be->onstack_used = 0;
if (be->nr_pages <= Z_EROFS_ONSTACK_PAGES) {
be->decompressed_pages = be->onstack_pages;
be->onstack_used = be->nr_pages;
memset(be->decompressed_pages, 0,
sizeof(struct page *) * be->nr_pages);
}
if (pclusterpages + be->onstack_used <= Z_EROFS_ONSTACK_PAGES)
be->compressed_pages = be->onstack_pages + be->onstack_used;
if (!be->decompressed_pages)
be->decompressed_pages =
kvcalloc(be->nr_pages, sizeof(struct page *),
GFP_KERNEL | __GFP_NOFAIL);
if (!be->compressed_pages)
be->compressed_pages =
kvcalloc(pclusterpages, sizeof(struct page *),
GFP_KERNEL | __GFP_NOFAIL);
z_erofs_parse_out_bvecs(be);
err2 = z_erofs_parse_in_bvecs(be, &overlapped);
if (err2)
err = err2;
if (err)
goto out;
llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
if (nr_pages << PAGE_SHIFT >= pcl->pageofs_out + llen) {
outputsize = llen;
partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
} else {
outputsize = (nr_pages << PAGE_SHIFT) - pcl->pageofs_out;
partial = true;
}
if (z_erofs_is_inline_pcluster(pcl))
inputsize = pcl->tailpacking_size;
else
inputsize = pclusterpages * PAGE_SIZE;
err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
.sb = sb,
.in = compressed_pages,
.out = pages,
.sb = be->sb,
.in = be->compressed_pages,
.out = be->decompressed_pages,
.pageofs_in = pcl->pageofs_in,
.pageofs_out = pcl->pageofs_out,
.inputsize = inputsize,
.outputsize = outputsize,
.outputsize = pcl->length,
.alg = pcl->algorithmformat,
.inplace_io = overlapped,
.partial_decoding = partial
}, pagepool);
.partial_decoding = pcl->partial,
.fillgaps = pcl->multibases,
}, be->pagepool);
out:
/* must handle all compressed pages before actual file pages */
if (z_erofs_is_inline_pcluster(pcl)) {
page = compressed_pages[0];
WRITE_ONCE(compressed_pages[0], NULL);
page = pcl->compressed_bvecs[0].page;
WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
put_page(page);
} else {
for (i = 0; i < pclusterpages; ++i) {
page = compressed_pages[i];
page = pcl->compressed_bvecs[i].page;
if (erofs_page_is_managed(sbi, page))
continue;
/* recycle all individual short-lived pages */
(void)z_erofs_put_shortlivedpage(pagepool, page);
WRITE_ONCE(compressed_pages[i], NULL);
(void)z_erofs_put_shortlivedpage(be->pagepool, page);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
}
}
if (be->compressed_pages < be->onstack_pages ||
be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES)
kvfree(be->compressed_pages);
z_erofs_fill_other_copies(be, err);
for (i = 0; i < nr_pages; ++i) {
page = pages[i];
for (i = 0; i < be->nr_pages; ++i) {
page = be->decompressed_pages[i];
if (!page)
continue;
DBG_BUGON(z_erofs_page_is_invalidated(page));
/* recycle all individual short-lived pages */
if (z_erofs_put_shortlivedpage(pagepool, page))
if (z_erofs_put_shortlivedpage(be->pagepool, page))
continue;
if (err < 0)
SetPageError(page);
if (err)
z_erofs_page_mark_eio(page);
z_erofs_onlinepage_endio(page);
}
if (pages == z_pagemap_global)
mutex_unlock(&z_pagemap_global_lock);
else if (pages != pages_onstack)
kvfree(pages);
if (be->decompressed_pages != be->onstack_pages)
kvfree(be->decompressed_pages);
pcl->nr_pages = 0;
pcl->length = 0;
pcl->partial = true;
pcl->multibases = false;
pcl->bvset.nextpage = NULL;
pcl->vcnt = 0;
/* pcluster lock MUST be taken before the following line */
......@@ -997,22 +1085,25 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
struct page **pagepool)
{
struct z_erofs_decompress_backend be = {
.sb = io->sb,
.pagepool = pagepool,
.decompressed_secondary_bvecs =
LIST_HEAD_INIT(be.decompressed_secondary_bvecs),
};
z_erofs_next_pcluster_t owned = io->head;
while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
struct z_erofs_pcluster *pcl;
/* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
/* impossible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
/* no possible that 'owned' equals NULL */
/* impossible that 'owned' equals Z_EROFS_PCLUSTER_NIL */
DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
pcl = container_of(owned, struct z_erofs_pcluster, next);
owned = READ_ONCE(pcl->next);
be.pcl = container_of(owned, struct z_erofs_pcluster, next);
owned = READ_ONCE(be.pcl->next);
z_erofs_decompress_pcluster(io->sb, pcl, pagepool);
erofs_workgroup_put(&pcl->obj);
z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0);
erofs_workgroup_put(&be.pcl->obj);
}
}
......@@ -1038,7 +1129,6 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
if (sync) {
if (!atomic_add_return(bios, &io->pending_bios))
complete(&io->u.done);
return;
}
......@@ -1071,7 +1161,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
int justfound;
repeat:
page = READ_ONCE(pcl->compressed_pages[nr]);
page = READ_ONCE(pcl->compressed_bvecs[nr].page);
oldpage = page;
if (!page)
......@@ -1087,7 +1177,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
* otherwise, it will go inplace I/O path instead.
*/
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
WRITE_ONCE(pcl->compressed_pages[nr], page);
WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
set_page_private(page, 0);
tocache = true;
goto out_tocache;
......@@ -1113,14 +1203,13 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
/* the page is still in manage cache */
if (page->mapping == mc) {
WRITE_ONCE(pcl->compressed_pages[nr], page);
WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
ClearPageError(page);
if (!PagePrivate(page)) {
/*
* impossible to be !PagePrivate(page) for
* the current restriction as well if
* the page is already in compressed_pages[].
* the page is already in compressed_bvecs[].
*/
DBG_BUGON(!justfound);
......@@ -1149,7 +1238,8 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
put_page(page);
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page,
oldpage, page)) {
erofs_pagepool_add(pagepool, page);
cond_resched();
goto repeat;
......@@ -1186,6 +1276,7 @@ jobqueue_init(struct super_block *sb,
q = fgq;
init_completion(&fgq->u.done);
atomic_set(&fgq->pending_bios, 0);
q->eio = false;
}
q->sb = sb;
q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
......@@ -1246,26 +1337,25 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
DBG_BUGON(PageUptodate(page));
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (err)
SetPageError(page);
if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
if (!err)
SetPageUptodate(page);
unlock_page(page);
}
}
if (err)
q->eio = true;
z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
bio_put(bio);
}
static void z_erofs_submit_queue(struct super_block *sb,
struct z_erofs_decompress_frontend *f,
static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
struct page **pagepool,
struct z_erofs_decompressqueue *fgq,
bool *force_fg)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
struct super_block *sb = f->inode->i_sb;
struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
void *bi_private;
......@@ -1317,7 +1407,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
struct page *page;
page = pickup_page_for_submission(pcl, i++, pagepool,
MNGD_MAPPING(sbi));
mc);
if (!page)
continue;
......@@ -1369,15 +1459,14 @@ static void z_erofs_submit_queue(struct super_block *sb,
z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios);
}
static void z_erofs_runqueue(struct super_block *sb,
struct z_erofs_decompress_frontend *f,
static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
struct page **pagepool, bool force_fg)
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
return;
z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
z_erofs_submit_queue(f, pagepool, io, &force_fg);
/* handle bypass queue (no i/o pclusters) immediately */
z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool);
......@@ -1475,7 +1564,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
(void)z_erofs_collector_end(&f);
/* if some compressed cluster ready, need submit them anyway */
z_erofs_runqueue(inode->i_sb, &f, &pagepool,
z_erofs_runqueue(&f, &pagepool,
z_erofs_get_sync_decompress_policy(sbi, 0));
if (err)
......@@ -1524,7 +1613,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
(void)z_erofs_collector_end(&f);
z_erofs_runqueue(inode->i_sb, &f, &pagepool,
z_erofs_runqueue(&f, &pagepool,
z_erofs_get_sync_decompress_policy(sbi, nr_pages));
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&pagepool);
......
......@@ -7,13 +7,10 @@
#define __EROFS_FS_ZDATA_H
#include "internal.h"
#include "zpvec.h"
#include "tagptr.h"
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_NR_INLINE_PAGEVECS 3
#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001
#define Z_EROFS_PCLUSTER_LENGTH_BIT 1
#define Z_EROFS_INLINE_BVECS 2
/*
* let's leave a type here in case of introducing
......@@ -21,6 +18,21 @@
*/
typedef void *z_erofs_next_pcluster_t;
struct z_erofs_bvec {
struct page *page;
int offset;
unsigned int end;
};
#define __Z_EROFS_BVSET(name, total) \
struct name { \
/* point to the next page which contains the following bvecs */ \
struct page *nextpage; \
struct z_erofs_bvec bvec[total]; \
}
__Z_EROFS_BVSET(z_erofs_bvset,);
__Z_EROFS_BVSET(z_erofs_bvset_inline, Z_EROFS_INLINE_BVECS);
/*
* Structure fields follow one of the following exclusion rules.
*
......@@ -38,24 +50,21 @@ struct z_erofs_pcluster {
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
/* A: lower limit of decompressed length and if full length or not */
/* L: the maximum decompression size of this round */
unsigned int length;
/* L: total number of bvecs */
unsigned int vcnt;
/* I: page offset of start position of decompression */
unsigned short pageofs_out;
/* I: page offset of inline compressed data */
unsigned short pageofs_in;
/* L: maximum relative page index in pagevec[] */
unsigned short nr_pages;
/* L: total number of pages in pagevec[] */
unsigned int vcnt;
union {
/* L: inline a certain number of pagevecs for bootstrap */
erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
/* L: inline a certain number of bvec for bootstrap */
struct z_erofs_bvset_inline bvset;
/* I: can be used to free the pcluster by RCU. */
struct rcu_head rcu;
......@@ -72,8 +81,14 @@ struct z_erofs_pcluster {
/* I: compression algorithm format */
unsigned char algorithmformat;
/* A: compressed pages (can be cached or inplaced pages) */
struct page *compressed_pages[];
/* L: whether partial decompression or not */
bool partial;
/* L: indicate several pageofs_outs or not */
bool multibases;
/* A: compressed bvecs (can be cached or inplaced pages) */
struct z_erofs_bvec compressed_bvecs[];
};
/* let's avoid the valid 32-bit kernel addresses */
......@@ -94,6 +109,8 @@ struct z_erofs_decompressqueue {
struct completion done;
struct work_struct work;
} u;
bool eio;
};
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
......@@ -108,38 +125,17 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
return pcl->pclusterpages;
}
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
/*
* waiters (aka. ongoing_packs): # to unlock the page
* sub-index: 0 - for partial page, >= 1 full page sub-index
* bit 31: I/O error occurred on this page
* bit 0 - 30: remaining parts to complete this page
*/
typedef atomic_t z_erofs_onlinepage_t;
/* type punning */
union z_erofs_onlinepage_converter {
z_erofs_onlinepage_t *o;
unsigned long *v;
};
static inline unsigned int z_erofs_onlinepage_index(struct page *page)
{
union z_erofs_onlinepage_converter u;
DBG_BUGON(!PagePrivate(page));
u.v = &page_private(page);
return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
}
#define Z_EROFS_PAGE_EIO (1 << 31)
static inline void z_erofs_onlinepage_init(struct page *page)
{
union {
z_erofs_onlinepage_t o;
atomic_t o;
unsigned long v;
/* keep from being unlocked in advance */
} u = { .o = ATOMIC_INIT(1) };
set_page_private(page, u.v);
......@@ -147,49 +143,36 @@ static inline void z_erofs_onlinepage_init(struct page *page)
SetPagePrivate(page);
}
static inline void z_erofs_onlinepage_fixup(struct page *page,
uintptr_t index, bool down)
static inline void z_erofs_onlinepage_split(struct page *page)
{
union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
int orig, orig_index, val;
repeat:
orig = atomic_read(u.o);
orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
if (orig_index) {
if (!index)
return;
atomic_inc((atomic_t *)&page->private);
}
DBG_BUGON(orig_index != index);
}
static inline void z_erofs_page_mark_eio(struct page *page)
{
int orig;
val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
if (atomic_cmpxchg(u.o, orig, val) != orig)
goto repeat;
do {
orig = atomic_read((atomic_t *)&page->private);
} while (atomic_cmpxchg((atomic_t *)&page->private, orig,
orig | Z_EROFS_PAGE_EIO) != orig);
}
static inline void z_erofs_onlinepage_endio(struct page *page)
{
union z_erofs_onlinepage_converter u;
unsigned int v;
DBG_BUGON(!PagePrivate(page));
u.v = &page_private(page);
v = atomic_dec_return(u.o);
if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
v = atomic_dec_return((atomic_t *)&page->private);
if (!(v & ~Z_EROFS_PAGE_EIO)) {
set_page_private(page, 0);
ClearPagePrivate(page);
if (!PageError(page))
if (!(v & Z_EROFS_PAGE_EIO))
SetPageUptodate(page);
unlock_page(page);
}
erofs_dbg("%s, page %p value %x", __func__, page, atomic_read(u.o));
}
#define Z_EROFS_VMAP_ONSTACK_PAGES \
min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
#define Z_EROFS_VMAP_GLOBAL_PAGES 2048
#define Z_EROFS_ONSTACK_PAGES 32
#endif
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2018 HUAWEI, Inc.
* https://www.huawei.com/
*/
#ifndef __EROFS_FS_ZPVEC_H
#define __EROFS_FS_ZPVEC_H
#include "tagptr.h"
/* page type in pagevec for decompress subsystem */
enum z_erofs_page_type {
/* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
Z_EROFS_PAGE_TYPE_EXCLUSIVE,
Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
Z_EROFS_VLE_PAGE_TYPE_HEAD,
Z_EROFS_VLE_PAGE_TYPE_MAX
};
extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
__bad_page_type_exclusive(void);
/* pagevec tagged pointer */
typedef tagptr2_t erofs_vtptr_t;
/* pagevec collector */
struct z_erofs_pagevec_ctor {
struct page *curr, *next;
erofs_vtptr_t *pages;
unsigned int nr, index;
};
static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
bool atomic)
{
if (!ctor->curr)
return;
if (atomic)
kunmap_atomic(ctor->pages);
else
kunmap(ctor->curr);
}
static inline struct page *
z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
unsigned int nr)
{
unsigned int index;
/* keep away from occupied pages */
if (ctor->next)
return ctor->next;
for (index = 0; index < nr; ++index) {
const erofs_vtptr_t t = ctor->pages[index];
const unsigned int tags = tagptr_unfold_tags(t);
if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
return tagptr_unfold_ptr(t);
}
DBG_BUGON(nr >= ctor->nr);
return NULL;
}
static inline void
z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
bool atomic)
{
struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
z_erofs_pagevec_ctor_exit(ctor, atomic);
ctor->curr = next;
ctor->next = NULL;
ctor->pages = atomic ?
kmap_atomic(ctor->curr) : kmap(ctor->curr);
ctor->nr = PAGE_SIZE / sizeof(struct page *);
ctor->index = 0;
}
static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
unsigned int nr,
erofs_vtptr_t *pages,
unsigned int i)
{
ctor->nr = nr;
ctor->curr = ctor->next = NULL;
ctor->pages = pages;
if (i >= nr) {
i -= nr;
z_erofs_pagevec_ctor_pagedown(ctor, false);
while (i > ctor->nr) {
i -= ctor->nr;
z_erofs_pagevec_ctor_pagedown(ctor, false);
}
}
ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
ctor->index = i;
}
static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
struct page *page,
enum z_erofs_page_type type,
bool pvec_safereuse)
{
if (!ctor->next) {
/* some pages cannot be reused as pvec safely without I/O */
if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse)
type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED;
if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
ctor->index + 1 == ctor->nr)
return false;
}
if (ctor->index >= ctor->nr)
z_erofs_pagevec_ctor_pagedown(ctor, false);
/* exclusive page type must be 0 */
if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
__bad_page_type_exclusive();
/* should remind that collector->next never equal to 1, 2 */
if (type == (uintptr_t)ctor->next) {
ctor->next = page;
}
ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type);
return true;
}
static inline struct page *
z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor,
enum z_erofs_page_type *type)
{
erofs_vtptr_t t;
if (ctor->index >= ctor->nr) {
DBG_BUGON(!ctor->next);
z_erofs_pagevec_ctor_pagedown(ctor, true);
}
t = ctor->pages[ctor->index];
*type = tagptr_unfold_tags(t);
/* should remind that collector->next never equal to 1, 2 */
if (*type == (uintptr_t)ctor->next)
ctor->next = tagptr_unfold_ptr(t);
ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0);
return tagptr_unfold_ptr(t);
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment