Commit 2080ca1e authored by Gao Xiang's avatar Gao Xiang

erofs: tidy up `struct z_erofs_bvec`

After revisiting the design, I believe `struct z_erofs_bvec` should
be page-based instead of folio-based due to the reasons below:

 - The minimized memory mapping block is a page;

 - Under the certain circumstances, only temporary pages needs to be
   used instead of folios since refcount, mapcount for such pages are
   unnecessary;

 - Decompressors handle all types of pages including temporary pages,
   not only folios.

When handling `struct z_erofs_bvec`, all folio-related information
is now accessed using the page_folio() helper.

The final goal of this round adaptation is to eliminate direct
accesses to `struct page` in the EROFS codebase, except for some
exceptions like `z_erofs_is_shortlived_page()` and
`z_erofs_page_is_invalidated()`, which require a new helper to
determine the memdesc type of an arbitrary page.

Actually large folios of compressed files seem to work now, yet I tend
to conduct more tests before officially enabling this for all scenarios.
Signed-off-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240703120051.3653452-4-hsiangkao@linux.alibaba.com
parent 5b9654ef
...@@ -19,10 +19,7 @@ ...@@ -19,10 +19,7 @@
typedef void *z_erofs_next_pcluster_t; typedef void *z_erofs_next_pcluster_t;
struct z_erofs_bvec { struct z_erofs_bvec {
union { struct page *page;
struct page *page;
struct folio *folio;
};
int offset; int offset;
unsigned int end; unsigned int end;
}; };
...@@ -617,32 +614,31 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) ...@@ -617,32 +614,31 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
} }
/* called by erofs_shrinker to get rid of all cached compressed bvecs */ /* (erofs_shrinker) disconnect cached encoded data with pclusters */
int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp) struct erofs_workgroup *grp)
{ {
struct z_erofs_pcluster *const pcl = struct z_erofs_pcluster *const pcl =
container_of(grp, struct z_erofs_pcluster, obj); container_of(grp, struct z_erofs_pcluster, obj);
unsigned int pclusterpages = z_erofs_pclusterpages(pcl); unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
struct folio *folio;
int i; int i;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
/* There is no actice user since the pcluster is now freezed */ /* Each cached folio contains one page unless bs > ps is supported */
for (i = 0; i < pclusterpages; ++i) { for (i = 0; i < pclusterpages; ++i) {
struct folio *folio = pcl->compressed_bvecs[i].folio; if (pcl->compressed_bvecs[i].page) {
folio = page_folio(pcl->compressed_bvecs[i].page);
/* Avoid reclaiming or migrating this folio */
if (!folio_trylock(folio))
return -EBUSY;
if (!folio) if (!erofs_folio_is_managed(sbi, folio))
continue; continue;
pcl->compressed_bvecs[i].page = NULL;
/* Avoid reclaiming or migrating this folio */ folio_detach_private(folio);
if (!folio_trylock(folio)) folio_unlock(folio);
return -EBUSY; }
if (!erofs_folio_is_managed(sbi, folio))
continue;
pcl->compressed_bvecs[i].folio = NULL;
folio_detach_private(folio);
folio_unlock(folio);
} }
return 0; return 0;
} }
...@@ -650,9 +646,9 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, ...@@ -650,9 +646,9 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
{ {
struct z_erofs_pcluster *pcl = folio_get_private(folio); struct z_erofs_pcluster *pcl = folio_get_private(folio);
unsigned int pclusterpages = z_erofs_pclusterpages(pcl); struct z_erofs_bvec *bvec = pcl->compressed_bvecs;
struct z_erofs_bvec *end = bvec + z_erofs_pclusterpages(pcl);
bool ret; bool ret;
int i;
if (!folio_test_private(folio)) if (!folio_test_private(folio))
return true; return true;
...@@ -661,9 +657,9 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) ...@@ -661,9 +657,9 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
spin_lock(&pcl->obj.lockref.lock); spin_lock(&pcl->obj.lockref.lock);
if (pcl->obj.lockref.count <= 0) { if (pcl->obj.lockref.count <= 0) {
DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pclusterpages; ++i) { for (; bvec < end; ++bvec) {
if (pcl->compressed_bvecs[i].folio == folio) { if (bvec->page && page_folio(bvec->page) == folio) {
pcl->compressed_bvecs[i].folio = NULL; bvec->page = NULL;
folio_detach_private(folio); folio_detach_private(folio);
ret = true; ret = true;
break; break;
...@@ -1062,7 +1058,7 @@ static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi, ...@@ -1062,7 +1058,7 @@ static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi,
static bool z_erofs_page_is_invalidated(struct page *page) static bool z_erofs_page_is_invalidated(struct page *page)
{ {
return !page->mapping && !z_erofs_is_shortlived_page(page); return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page);
} }
struct z_erofs_decompress_backend { struct z_erofs_decompress_backend {
...@@ -1415,7 +1411,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, ...@@ -1415,7 +1411,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
bool tocache = false; bool tocache = false;
struct z_erofs_bvec zbv; struct z_erofs_bvec zbv;
struct address_space *mapping; struct address_space *mapping;
struct page *page; struct folio *folio;
int bs = i_blocksize(f->inode); int bs = i_blocksize(f->inode);
/* Except for inplace folios, the entire folio can be used for I/Os */ /* Except for inplace folios, the entire folio can be used for I/Os */
...@@ -1425,23 +1421,25 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, ...@@ -1425,23 +1421,25 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
spin_lock(&pcl->obj.lockref.lock); spin_lock(&pcl->obj.lockref.lock);
zbv = pcl->compressed_bvecs[nr]; zbv = pcl->compressed_bvecs[nr];
spin_unlock(&pcl->obj.lockref.lock); spin_unlock(&pcl->obj.lockref.lock);
if (!zbv.folio) if (!zbv.page)
goto out_allocfolio; goto out_allocfolio;
bvec->bv_page = &zbv.folio->page; bvec->bv_page = zbv.page;
DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
folio = page_folio(zbv.page);
/* /*
* Handle preallocated cached folios. We tried to allocate such folios * Handle preallocated cached folios. We tried to allocate such folios
* without triggering direct reclaim. If allocation failed, inplace * without triggering direct reclaim. If allocation failed, inplace
* file-backed folios will be used instead. * file-backed folios will be used instead.
*/ */
if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
zbv.folio->private = 0; folio->private = 0;
tocache = true; tocache = true;
goto out_tocache; goto out_tocache;
} }
mapping = READ_ONCE(zbv.folio->mapping); mapping = READ_ONCE(folio->mapping);
/* /*
* File-backed folios for inplace I/Os are all locked steady, * File-backed folios for inplace I/Os are all locked steady,
* therefore it is impossible for `mapping` to be NULL. * therefore it is impossible for `mapping` to be NULL.
...@@ -1453,21 +1451,21 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, ...@@ -1453,21 +1451,21 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
return; return;
} }
folio_lock(zbv.folio); folio_lock(folio);
if (zbv.folio->mapping == mc) { if (folio->mapping == mc) {
/* /*
* The cached folio is still in managed cache but without * The cached folio is still in managed cache but without
* a valid `->private` pcluster hint. Let's reconnect them. * a valid `->private` pcluster hint. Let's reconnect them.
*/ */
if (!folio_test_private(zbv.folio)) { if (!folio_test_private(folio)) {
folio_attach_private(zbv.folio, pcl); folio_attach_private(folio, pcl);
/* compressed_bvecs[] already takes a ref before */ /* compressed_bvecs[] already takes a ref before */
folio_put(zbv.folio); folio_put(folio);
} }
/* no need to submit if it is already up-to-date */ /* no need to submit if it is already up-to-date */
if (folio_test_uptodate(zbv.folio)) { if (folio_test_uptodate(folio)) {
folio_unlock(zbv.folio); folio_unlock(folio);
bvec->bv_page = NULL; bvec->bv_page = NULL;
} }
return; return;
...@@ -1477,32 +1475,31 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, ...@@ -1477,32 +1475,31 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
* It has been truncated, so it's unsafe to reuse this one. Let's * It has been truncated, so it's unsafe to reuse this one. Let's
* allocate a new page for compressed data. * allocate a new page for compressed data.
*/ */
DBG_BUGON(zbv.folio->mapping); DBG_BUGON(folio->mapping);
tocache = true; tocache = true;
folio_unlock(zbv.folio); folio_unlock(folio);
folio_put(zbv.folio); folio_put(folio);
out_allocfolio: out_allocfolio:
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
spin_lock(&pcl->obj.lockref.lock); spin_lock(&pcl->obj.lockref.lock);
if (pcl->compressed_bvecs[nr].folio) { if (pcl->compressed_bvecs[nr].page) {
erofs_pagepool_add(&f->pagepool, page); erofs_pagepool_add(&f->pagepool, zbv.page);
spin_unlock(&pcl->obj.lockref.lock); spin_unlock(&pcl->obj.lockref.lock);
cond_resched(); cond_resched();
goto repeat; goto repeat;
} }
pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page); bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page;
folio = page_folio(zbv.page);
/* first mark it as a temporary shortlived folio (now 1 ref) */
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
spin_unlock(&pcl->obj.lockref.lock); spin_unlock(&pcl->obj.lockref.lock);
bvec->bv_page = page;
out_tocache: out_tocache:
if (!tocache || bs != PAGE_SIZE || if (!tocache || bs != PAGE_SIZE ||
filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) { filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp))
/* turn into a temporary shortlived folio (1 ref) */
zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
return; return;
} folio_attach_private(folio, pcl);
folio_attach_private(zbv.folio, pcl);
/* drop a refcount added by allocpage (then 2 refs in total here) */ /* drop a refcount added by allocpage (then 2 refs in total here) */
folio_put(zbv.folio); folio_put(folio);
} }
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment