Commit fbb871e2 authored by Max Filippov's avatar Max Filippov

xtensa: clean up word alignment macros in assembly code

Remove duplicate definitions of ALIGN/src_b/__src_b and SSA8/ssa8/__ssa8
from assembly sources and put single definition into asm/asmmacro.h
Signed-off-by: default avatarMax Filippov <jcmvbkbc@gmail.com>
parent 0013aceb
...@@ -158,4 +158,37 @@ ...@@ -158,4 +158,37 @@
.previous \ .previous \
97: 97:
/*
* Extract unaligned word that is split between two registers w0 and w1
* into r regardless of machine endianness. SAR must be loaded with the
* starting bit of the word (see __ssa8).
*/
.macro __src_b r, w0, w1
#ifdef __XTENSA_EB__
src \r, \w0, \w1
#else
src \r, \w1, \w0
#endif
.endm
/*
* Load 2 lowest address bits of r into SAR for __src_b to extract unaligned
* word starting at r from two registers loaded from consecutive aligned
* addresses covering r regardless of machine endianness.
*
* r 0 1 2 3
* LE SAR 0 8 16 24
* BE SAR 32 24 16 8
*/
.macro __ssa8 r
#ifdef __XTENSA_EB__
ssa8b \r
#else
ssa8l \r
#endif
.endm
#endif /* _XTENSA_ASMMACRO_H */ #endif /* _XTENSA_ASMMACRO_H */
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/current.h> #include <asm/current.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/asmmacro.h>
#include <asm/processor.h> #include <asm/processor.h>
#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
...@@ -66,8 +67,6 @@ ...@@ -66,8 +67,6 @@
#define INSN_T 24 #define INSN_T 24
#define INSN_OP1 16 #define INSN_OP1 16
.macro __src_b r, w0, w1; src \r, \w0, \w1; .endm
.macro __ssa8 r; ssa8b \r; .endm
.macro __ssa8r r; ssa8l \r; .endm .macro __ssa8r r; ssa8l \r; .endm
.macro __sh r, s; srl \r, \s; .endm .macro __sh r, s; srl \r, \s; .endm
.macro __sl r, s; sll \r, \s; .endm .macro __sl r, s; sll \r, \s; .endm
...@@ -81,8 +80,6 @@ ...@@ -81,8 +80,6 @@
#define INSN_T 4 #define INSN_T 4
#define INSN_OP1 12 #define INSN_OP1 12
.macro __src_b r, w0, w1; src \r, \w1, \w0; .endm
.macro __ssa8 r; ssa8l \r; .endm
.macro __ssa8r r; ssa8b \r; .endm .macro __ssa8r r; ssa8b \r; .endm
.macro __sh r, s; sll \r, \s; .endm .macro __sh r, s; sll \r, \s; .endm
.macro __sl r, s; srl \r, \s; .endm .macro __sl r, s; srl \r, \s; .endm
......
...@@ -10,22 +10,7 @@ ...@@ -10,22 +10,7 @@
*/ */
#include <variant/core.h> #include <variant/core.h>
#include <asm/asmmacro.h>
.macro src_b r, w0, w1
#ifdef __XTENSA_EB__
src \r, \w0, \w1
#else
src \r, \w1, \w0
#endif
.endm
.macro ssa8 r
#ifdef __XTENSA_EB__
ssa8b \r
#else
ssa8l \r
#endif
.endm
/* /*
* void *memcpy(void *dst, const void *src, size_t len); * void *memcpy(void *dst, const void *src, size_t len);
...@@ -209,7 +194,7 @@ memcpy: ...@@ -209,7 +194,7 @@ memcpy:
.Lsrcunaligned: .Lsrcunaligned:
_beqz a4, .Ldone # avoid loading anything for zero-length copies _beqz a4, .Ldone # avoid loading anything for zero-length copies
# copy 16 bytes per iteration for word-aligned dst and unaligned src # copy 16 bytes per iteration for word-aligned dst and unaligned src
ssa8 a3 # set shift amount from byte offset __ssa8 a3 # set shift amount from byte offset
/* set to 1 when running on ISS (simulator) with the /* set to 1 when running on ISS (simulator) with the
lint or ferret client, or 0 to save a few cycles */ lint or ferret client, or 0 to save a few cycles */
...@@ -229,16 +214,16 @@ memcpy: ...@@ -229,16 +214,16 @@ memcpy:
.Loop2: .Loop2:
l32i a7, a3, 4 l32i a7, a3, 4
l32i a8, a3, 8 l32i a8, a3, 8
src_b a6, a6, a7 __src_b a6, a6, a7
s32i a6, a5, 0 s32i a6, a5, 0
l32i a9, a3, 12 l32i a9, a3, 12
src_b a7, a7, a8 __src_b a7, a7, a8
s32i a7, a5, 4 s32i a7, a5, 4
l32i a6, a3, 16 l32i a6, a3, 16
src_b a8, a8, a9 __src_b a8, a8, a9
s32i a8, a5, 8 s32i a8, a5, 8
addi a3, a3, 16 addi a3, a3, 16
src_b a9, a9, a6 __src_b a9, a9, a6
s32i a9, a5, 12 s32i a9, a5, 12
addi a5, a5, 16 addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
...@@ -249,10 +234,10 @@ memcpy: ...@@ -249,10 +234,10 @@ memcpy:
# copy 8 bytes # copy 8 bytes
l32i a7, a3, 4 l32i a7, a3, 4
l32i a8, a3, 8 l32i a8, a3, 8
src_b a6, a6, a7 __src_b a6, a6, a7
s32i a6, a5, 0 s32i a6, a5, 0
addi a3, a3, 8 addi a3, a3, 8
src_b a7, a7, a8 __src_b a7, a7, a8
s32i a7, a5, 4 s32i a7, a5, 4
addi a5, a5, 8 addi a5, a5, 8
mov a6, a8 mov a6, a8
...@@ -261,7 +246,7 @@ memcpy: ...@@ -261,7 +246,7 @@ memcpy:
# copy 4 bytes # copy 4 bytes
l32i a7, a3, 4 l32i a7, a3, 4
addi a3, a3, 4 addi a3, a3, 4
src_b a6, a6, a7 __src_b a6, a6, a7
s32i a6, a5, 0 s32i a6, a5, 0
addi a5, a5, 4 addi a5, a5, 4
mov a6, a7 mov a6, a7
...@@ -485,7 +470,7 @@ memmove: ...@@ -485,7 +470,7 @@ memmove:
.Lbacksrcunaligned: .Lbacksrcunaligned:
_beqz a4, .Lbackdone # avoid loading anything for zero-length copies _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
# copy 16 bytes per iteration for word-aligned dst and unaligned src # copy 16 bytes per iteration for word-aligned dst and unaligned src
ssa8 a3 # set shift amount from byte offset __ssa8 a3 # set shift amount from byte offset
#define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
* the lint or ferret client, or 0 * the lint or ferret client, or 0
* to save a few cycles */ * to save a few cycles */
...@@ -506,15 +491,15 @@ memmove: ...@@ -506,15 +491,15 @@ memmove:
l32i a7, a3, 12 l32i a7, a3, 12
l32i a8, a3, 8 l32i a8, a3, 8
addi a5, a5, -16 addi a5, a5, -16
src_b a6, a7, a6 __src_b a6, a7, a6
s32i a6, a5, 12 s32i a6, a5, 12
l32i a9, a3, 4 l32i a9, a3, 4
src_b a7, a8, a7 __src_b a7, a8, a7
s32i a7, a5, 8 s32i a7, a5, 8
l32i a6, a3, 0 l32i a6, a3, 0
src_b a8, a9, a8 __src_b a8, a9, a8
s32i a8, a5, 4 s32i a8, a5, 4
src_b a9, a6, a9 __src_b a9, a6, a9
s32i a9, a5, 0 s32i a9, a5, 0
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
...@@ -526,9 +511,9 @@ memmove: ...@@ -526,9 +511,9 @@ memmove:
l32i a7, a3, 4 l32i a7, a3, 4
l32i a8, a3, 0 l32i a8, a3, 0
addi a5, a5, -8 addi a5, a5, -8
src_b a6, a7, a6 __src_b a6, a7, a6
s32i a6, a5, 4 s32i a6, a5, 4
src_b a7, a8, a7 __src_b a7, a8, a7
s32i a7, a5, 0 s32i a7, a5, 0
mov a6, a8 mov a6, a8
.Lback12: .Lback12:
...@@ -537,7 +522,7 @@ memmove: ...@@ -537,7 +522,7 @@ memmove:
addi a3, a3, -4 addi a3, a3, -4
l32i a7, a3, 0 l32i a7, a3, 0
addi a5, a5, -4 addi a5, a5, -4
src_b a6, a7, a6 __src_b a6, a7, a6
s32i a6, a5, 0 s32i a6, a5, 0
mov a6, a7 mov a6, a7
.Lback13: .Lback13:
......
...@@ -56,14 +56,6 @@ ...@@ -56,14 +56,6 @@
#include <variant/core.h> #include <variant/core.h>
#include <asm/asmmacro.h> #include <asm/asmmacro.h>
#ifdef __XTENSA_EB__
#define ALIGN(R, W0, W1) src R, W0, W1
#define SSA8(R) ssa8b R
#else
#define ALIGN(R, W0, W1) src R, W1, W0
#define SSA8(R) ssa8l R
#endif
.text .text
.align 4 .align 4
.global __xtensa_copy_user .global __xtensa_copy_user
...@@ -81,7 +73,7 @@ __xtensa_copy_user: ...@@ -81,7 +73,7 @@ __xtensa_copy_user:
# per iteration # per iteration
movi a8, 3 # if source is also aligned, movi a8, 3 # if source is also aligned,
bnone a3, a8, .Laligned # then use word copy bnone a3, a8, .Laligned # then use word copy
SSA8( a3) # set shift amount from byte offset __ssa8 a3 # set shift amount from byte offset
bnez a4, .Lsrcunaligned bnez a4, .Lsrcunaligned
movi a2, 0 # return success for len==0 movi a2, 0 # return success for len==0
retw retw
...@@ -220,16 +212,16 @@ EX(10f) l32i a6, a3, 0 # load first word ...@@ -220,16 +212,16 @@ EX(10f) l32i a6, a3, 0 # load first word
.Loop2: .Loop2:
EX(10f) l32i a7, a3, 4 EX(10f) l32i a7, a3, 4
EX(10f) l32i a8, a3, 8 EX(10f) l32i a8, a3, 8
ALIGN( a6, a6, a7) __src_b a6, a6, a7
EX(10f) s32i a6, a5, 0 EX(10f) s32i a6, a5, 0
EX(10f) l32i a9, a3, 12 EX(10f) l32i a9, a3, 12
ALIGN( a7, a7, a8) __src_b a7, a7, a8
EX(10f) s32i a7, a5, 4 EX(10f) s32i a7, a5, 4
EX(10f) l32i a6, a3, 16 EX(10f) l32i a6, a3, 16
ALIGN( a8, a8, a9) __src_b a8, a8, a9
EX(10f) s32i a8, a5, 8 EX(10f) s32i a8, a5, 8
addi a3, a3, 16 addi a3, a3, 16
ALIGN( a9, a9, a6) __src_b a9, a9, a6
EX(10f) s32i a9, a5, 12 EX(10f) s32i a9, a5, 12
addi a5, a5, 16 addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
...@@ -240,10 +232,10 @@ EX(10f) s32i a9, a5, 12 ...@@ -240,10 +232,10 @@ EX(10f) s32i a9, a5, 12
# copy 8 bytes # copy 8 bytes
EX(10f) l32i a7, a3, 4 EX(10f) l32i a7, a3, 4
EX(10f) l32i a8, a3, 8 EX(10f) l32i a8, a3, 8
ALIGN( a6, a6, a7) __src_b a6, a6, a7
EX(10f) s32i a6, a5, 0 EX(10f) s32i a6, a5, 0
addi a3, a3, 8 addi a3, a3, 8
ALIGN( a7, a7, a8) __src_b a7, a7, a8
EX(10f) s32i a7, a5, 4 EX(10f) s32i a7, a5, 4
addi a5, a5, 8 addi a5, a5, 8
mov a6, a8 mov a6, a8
...@@ -252,7 +244,7 @@ EX(10f) s32i a7, a5, 4 ...@@ -252,7 +244,7 @@ EX(10f) s32i a7, a5, 4
# copy 4 bytes # copy 4 bytes
EX(10f) l32i a7, a3, 4 EX(10f) l32i a7, a3, 4
addi a3, a3, 4 addi a3, a3, 4
ALIGN( a6, a6, a7) __src_b a6, a6, a7
EX(10f) s32i a6, a5, 0 EX(10f) s32i a6, a5, 0
addi a5, a5, 4 addi a5, a5, 4
mov a6, a7 mov a6, a7
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment