Commit a112152f authored by Gao Xiang's avatar Gao Xiang Committed by Greg Kroah-Hartman

staging: erofs: fix mis-acted TAIL merging behavior

EROFS has an optimized path called TAIL merging, which is designed
to merge multiple reads and the corresponding decompressions into
one if these requests read continuous pages almost at the same time.

In general, it behaves as follows:
 ________________________________________________________________
  ... |  TAIL  .  HEAD  |  PAGE  |  PAGE  |  TAIL    . HEAD | ...
 _____|_combined page A_|________|________|_combined page B_|____
        1  ]  ->  [  2                          ]  ->  [ 3
If the above three reads are requested in the order 1-2-3, it will
generate a large work chain rather than 3 individual work chains
to reduce scheduling overhead and boost up sequential read.

However, if Read 2 is processed slightly earlier than Read 1,
currently it still generates 2 individual work chains (chain 1, 2)
but it does in-place decompression for combined page A, moreover,
if chain 2 decompresses ahead of chain 1, it will be a race and
lead to corrupted decompressed page. This patch fixes it.

Fixes: 3883a79a ("staging: erofs: introduce VLE decompression support")
Cc: <stable@vger.kernel.org> # 4.19+
Signed-off-by: default avatarGao Xiang <gaoxiang25@huawei.com>
Reviewed-by: default avatarChao Yu <yuchao0@huawei.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 1e5ceeab
...@@ -107,15 +107,30 @@ enum z_erofs_vle_work_role { ...@@ -107,15 +107,30 @@ enum z_erofs_vle_work_role {
Z_EROFS_VLE_WORK_SECONDARY, Z_EROFS_VLE_WORK_SECONDARY,
Z_EROFS_VLE_WORK_PRIMARY, Z_EROFS_VLE_WORK_PRIMARY,
/* /*
* The current work has at least been linked with the following * The current work was the tail of an exist chain, and the previous
* processed chained works, which means if the processing page * processed chained works are all decided to be hooked up to it.
* is the tail partial page of the work, the current work can * A new chain should be created for the remaining unprocessed works,
* safely use the whole page, as illustrated below: * therefore different from Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
* +--------------+-------------------------------------------+ * the next work cannot reuse the whole page in the following scenario:
* | tail page | head page (of the previous work) | * ________________________________________________________________
* +--------------+-------------------------------------------+ * | tail (partial) page | head (partial) page |
* /\ which belongs to the current work * | (belongs to the next work) | (belongs to the current work) |
* [ (*) this page can be used for the current work itself. ] * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
*/
Z_EROFS_VLE_WORK_PRIMARY_HOOKED,
/*
* The current work has been linked with the processed chained works,
* and could be also linked with the potential remaining works, which
* means if the processing page is the tail partial page of the work,
* the current work can safely use the whole page (since the next work
* is under control) for in-place decompression, as illustrated below:
* ________________________________________________________________
* | tail (partial) page | head (partial) page |
* | (of the current work) | (of the previous work) |
* | PRIMARY_FOLLOWED or | |
* |_____PRIMARY_HOOKED____|____________PRIMARY_FOLLOWED____________|
*
* [ (*) the above page can be used for the current work itself. ]
*/ */
Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
Z_EROFS_VLE_WORK_MAX Z_EROFS_VLE_WORK_MAX
...@@ -309,10 +324,10 @@ static int z_erofs_vle_work_add_page( ...@@ -309,10 +324,10 @@ static int z_erofs_vle_work_add_page(
return ret ? 0 : -EAGAIN; return ret ? 0 : -EAGAIN;
} }
static inline bool try_to_claim_workgroup( static enum z_erofs_vle_work_role
struct z_erofs_vle_workgroup *grp, try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp,
z_erofs_vle_owned_workgrp_t *owned_head, z_erofs_vle_owned_workgrp_t *owned_head,
bool *hosted) bool *hosted)
{ {
DBG_BUGON(*hosted == true); DBG_BUGON(*hosted == true);
...@@ -326,6 +341,9 @@ static inline bool try_to_claim_workgroup( ...@@ -326,6 +341,9 @@ static inline bool try_to_claim_workgroup(
*owned_head = &grp->next; *owned_head = &grp->next;
*hosted = true; *hosted = true;
/* lucky, I am the followee :) */
return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
} else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) { } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
/* /*
* type 2, link to the end of a existing open chain, * type 2, link to the end of a existing open chain,
...@@ -335,12 +353,11 @@ static inline bool try_to_claim_workgroup( ...@@ -335,12 +353,11 @@ static inline bool try_to_claim_workgroup(
if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL, if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
*owned_head) != Z_EROFS_VLE_WORKGRP_TAIL) *owned_head) != Z_EROFS_VLE_WORKGRP_TAIL)
goto retry; goto retry;
*owned_head = Z_EROFS_VLE_WORKGRP_TAIL; *owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
} else return Z_EROFS_VLE_WORK_PRIMARY_HOOKED;
return false; /* :( better luck next time */ }
return true; /* lucky, I am the followee :) */ return Z_EROFS_VLE_WORK_PRIMARY; /* :( better luck next time */
} }
struct z_erofs_vle_work_finder { struct z_erofs_vle_work_finder {
...@@ -418,12 +435,9 @@ z_erofs_vle_work_lookup(const struct z_erofs_vle_work_finder *f) ...@@ -418,12 +435,9 @@ z_erofs_vle_work_lookup(const struct z_erofs_vle_work_finder *f)
*f->hosted = false; *f->hosted = false;
if (!primary) if (!primary)
*f->role = Z_EROFS_VLE_WORK_SECONDARY; *f->role = Z_EROFS_VLE_WORK_SECONDARY;
/* claim the workgroup if possible */ else /* claim the workgroup if possible */
else if (try_to_claim_workgroup(grp, f->owned_head, f->hosted)) *f->role = try_to_claim_workgroup(grp, f->owned_head,
*f->role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; f->hosted);
else
*f->role = Z_EROFS_VLE_WORK_PRIMARY;
return work; return work;
} }
...@@ -487,6 +501,9 @@ z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f, ...@@ -487,6 +501,9 @@ z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f,
return work; return work;
} }
#define builder_is_hooked(builder) \
((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED)
#define builder_is_followed(builder) \ #define builder_is_followed(builder) \
((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED) ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
...@@ -680,7 +697,7 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe, ...@@ -680,7 +697,7 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
struct z_erofs_vle_work_builder *const builder = &fe->builder; struct z_erofs_vle_work_builder *const builder = &fe->builder;
const loff_t offset = page_offset(page); const loff_t offset = page_offset(page);
bool tight = builder_is_followed(builder); bool tight = builder_is_hooked(builder);
struct z_erofs_vle_work *work = builder->work; struct z_erofs_vle_work *work = builder->work;
enum z_erofs_cache_alloctype cache_strategy; enum z_erofs_cache_alloctype cache_strategy;
...@@ -739,7 +756,7 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe, ...@@ -739,7 +756,7 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
map->m_plen / PAGE_SIZE, map->m_plen / PAGE_SIZE,
cache_strategy, page_pool, GFP_KERNEL); cache_strategy, page_pool, GFP_KERNEL);
tight &= builder_is_followed(builder); tight &= builder_is_hooked(builder);
work = builder->work; work = builder->work;
hitted: hitted:
cur = end - min_t(unsigned int, offset + end - map->m_la, end); cur = end - min_t(unsigned int, offset + end - map->m_la, end);
...@@ -754,6 +771,9 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe, ...@@ -754,6 +771,9 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
(tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
if (cur)
tight &= builder_is_followed(builder);
retry: retry:
err = z_erofs_vle_work_add_page(builder, page, page_type); err = z_erofs_vle_work_add_page(builder, page, page_type);
/* should allocate an additional staging page for pagevec */ /* should allocate an additional staging page for pagevec */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment