Commit 4d1e7918 authored by Eugeniy Paltsev's avatar Eugeniy Paltsev Committed by Vineet Gupta

ARCv2: lib: introduce memcpy optimized for unaligned access

Optimise code to use efficient unaligned memory access which is
available on ARCv2. This allows us to really simplify memcpy code
and speed up the code one and a half times (in case of unaligned
source or destination).

Don't wire it up yet !
Signed-off-by: default avatarEugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
parent 5d4ab8d0
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* ARCv2 memcpy implementation optimized for unaligned memory access using.
*
* Copyright (C) 2019 Synopsys
* Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
*/
#include <linux/linkage.h>
#ifdef CONFIG_ARC_HAS_LL64
# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
# define ZOLSHFT 5
# define ZOLAND 0x1F
#else
# define LOADX(DST,RX) ld.ab DST, [RX, 4]
# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
# define ZOLSHFT 4
# define ZOLAND 0xF
#endif
ENTRY_CFI(memcpy)
mov r3, r0 ; don;t clobber ret val
lsr.f lp_count, r2, ZOLSHFT
lpnz @.Lcopy32_64bytes
;; LOOP START
LOADX (r6, r1)
LOADX (r8, r1)
LOADX (r10, r1)
LOADX (r4, r1)
STOREX (r6, r3)
STOREX (r8, r3)
STOREX (r10, r3)
STOREX (r4, r3)
.Lcopy32_64bytes:
and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
lpnz @.Lcopyremainingbytes
;; LOOP START
ldb.ab r5, [r1, 1]
stb.ab r5, [r3, 1]
.Lcopyremainingbytes:
j [blink]
END_CFI(memcpy)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment