Commit 3d0cca0b authored by Evgenii Stepanov's avatar Evgenii Stepanov Committed by Will Deacon

kasan: speed up mte_set_mem_tag_range

Use DC GVA / DC GZVA to speed up KASan memory tagging in HW tags mode.

The first cacheline is always tagged using STG/STZG even if the address is
cacheline-aligned, as benchmarks show it is faster than a conditional
branch.
Signed-off-by: default avatarEvgenii Stepanov <eugenis@google.com>
Co-developed-by: default avatarPeter Collingbourne <pcc@google.com>
Signed-off-by: default avatarPeter Collingbourne <pcc@google.com>
Reviewed-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210521010023.3244784-1-eugenis@google.comSigned-off-by: default avatarWill Deacon <will@kernel.org>
parent c4681547
...@@ -48,43 +48,84 @@ static inline u8 mte_get_random_tag(void) ...@@ -48,43 +48,84 @@ static inline u8 mte_get_random_tag(void)
return mte_get_ptr_tag(addr); return mte_get_ptr_tag(addr);
} }
static inline u64 __stg_post(u64 p)
{
asm volatile(__MTE_PREAMBLE "stg %0, [%0], #16"
: "+r"(p)
:
: "memory");
return p;
}
static inline u64 __stzg_post(u64 p)
{
asm volatile(__MTE_PREAMBLE "stzg %0, [%0], #16"
: "+r"(p)
:
: "memory");
return p;
}
static inline void __dc_gva(u64 p)
{
asm volatile(__MTE_PREAMBLE "dc gva, %0" : : "r"(p) : "memory");
}
static inline void __dc_gzva(u64 p)
{
asm volatile(__MTE_PREAMBLE "dc gzva, %0" : : "r"(p) : "memory");
}
/* /*
* Assign allocation tags for a region of memory based on the pointer tag. * Assign allocation tags for a region of memory based on the pointer tag.
* Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
* size must be non-zero and MTE_GRANULE_SIZE aligned. * size must be MTE_GRANULE_SIZE aligned.
*/ */
static inline void mte_set_mem_tag_range(void *addr, size_t size, static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
u8 tag, bool init) bool init)
{ {
u64 curr, end; u64 curr, mask, dczid_bs, end1, end2, end3;
if (!size) /* Read DC G(Z)VA block size from the system register. */
return; dczid_bs = 4ul << (read_cpuid(DCZID_EL0) & 0xf);
curr = (u64)__tag_set(addr, tag); curr = (u64)__tag_set(addr, tag);
end = curr + size; mask = dczid_bs - 1;
/* STG/STZG up to the end of the first block. */
end1 = curr | mask;
end3 = curr + size;
/* DC GVA / GZVA in [end1, end2) */
end2 = end3 & ~mask;
/* /*
* 'asm volatile' is required to prevent the compiler to move * The following code uses STG on the first DC GVA block even if the
* the statement outside of the loop. * start address is aligned - it appears to be faster than an alignment
* check + conditional branch. Also, if the range size is at least 2 DC
* GVA blocks, the first two loops can use post-condition to save one
* branch each.
*/ */
if (init) { #define SET_MEMTAG_RANGE(stg_post, dc_gva) \
do { do { \
asm volatile(__MTE_PREAMBLE "stzg %0, [%0]" if (size >= 2 * dczid_bs) { \
: do { \
: "r" (curr) curr = stg_post(curr); \
: "memory"); } while (curr < end1); \
curr += MTE_GRANULE_SIZE; \
} while (curr != end); do { \
} else { dc_gva(curr); \
do { curr += dczid_bs; \
asm volatile(__MTE_PREAMBLE "stg %0, [%0]" } while (curr < end2); \
: } \
: "r" (curr) \
: "memory"); while (curr < end3) \
curr += MTE_GRANULE_SIZE; curr = stg_post(curr); \
} while (curr != end); } while (0)
}
if (init)
SET_MEMTAG_RANGE(__stzg_post, __dc_gzva);
else
SET_MEMTAG_RANGE(__stg_post, __dc_gva);
#undef SET_MEMTAG_RANGE
} }
void mte_enable_kernel_sync(void); void mte_enable_kernel_sync(void);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment