Commit f441882a authored by Vincent Whitchurch's avatar Vincent Whitchurch Committed by Russell King

ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS

ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged
ldr/str instructions in copy_{from/to}_user.  They are currently
unnecessarily using single ldr/str instructions and can use ldm/stm
instructions instead like memcpy does (but with appropriate fixup
tables).

This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by
about 4% on my Cortex-A9.

before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s
before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s
before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s
before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s
before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s
before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s
before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s
before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s
before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s
before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s

 after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s
 after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s
 after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s
 after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s
 after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s
 after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s
 after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s
 after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s
 after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s
 after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s
Reviewed-by: default avatarNicolas Pitre <nico@linaro.org>
Signed-off-by: default avatarVincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: default avatarRussell King <rmk+kernel@armlinux.org.uk>
parent bc2eca9a
...@@ -243,13 +243,15 @@ ...@@ -243,13 +243,15 @@
.endm .endm
#endif #endif
#define USER(x...) \ #define USERL(l, x...) \
9999: x; \ 9999: x; \
.pushsection __ex_table,"a"; \ .pushsection __ex_table,"a"; \
.align 3; \ .align 3; \
.long 9999b,9001f; \ .long 9999b,l; \
.popsection .popsection
#define USER(x...) USERL(9001f, x)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define ALT_SMP(instr...) \ #define ALT_SMP(instr...) \
9998: instr 9998: instr
......
...@@ -34,12 +34,13 @@ ...@@ -34,12 +34,13 @@
* Number of bytes NOT copied. * Number of bytes NOT copied.
*/ */
#ifdef CONFIG_CPU_USE_DOMAINS
#ifndef CONFIG_THUMB2_KERNEL #ifndef CONFIG_THUMB2_KERNEL
#define LDR1W_SHIFT 0 #define LDR1W_SHIFT 0
#else #else
#define LDR1W_SHIFT 1 #define LDR1W_SHIFT 1
#endif #endif
#define STR1W_SHIFT 0
.macro ldr1w ptr reg abort .macro ldr1w ptr reg abort
ldrusr \reg, \ptr, 4, abort=\abort ldrusr \reg, \ptr, 4, abort=\abort
...@@ -57,10 +58,30 @@ ...@@ -57,10 +58,30 @@
ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
.endm .endm
#else
#define LDR1W_SHIFT 0
.macro ldr1w ptr reg abort
USERL(\abort, W(ldr) \reg, [\ptr], #4)
.endm
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4})
.endm
.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
.endm
#endif /* CONFIG_CPU_USE_DOMAINS */
.macro ldr1b ptr reg cond=al abort .macro ldr1b ptr reg cond=al abort
ldrusr \reg, \ptr, 1, \cond, abort=\abort ldrusr \reg, \ptr, 1, \cond, abort=\abort
.endm .endm
#define STR1W_SHIFT 0
.macro str1w ptr reg abort .macro str1w ptr reg abort
W(str) \reg, [\ptr], #4 W(str) \reg, [\ptr], #4
.endm .endm
......
...@@ -35,11 +35,6 @@ ...@@ -35,11 +35,6 @@
*/ */
#define LDR1W_SHIFT 0 #define LDR1W_SHIFT 0
#ifndef CONFIG_THUMB2_KERNEL
#define STR1W_SHIFT 0
#else
#define STR1W_SHIFT 1
#endif
.macro ldr1w ptr reg abort .macro ldr1w ptr reg abort
W(ldr) \reg, [\ptr], #4 W(ldr) \reg, [\ptr], #4
...@@ -57,6 +52,14 @@ ...@@ -57,6 +52,14 @@
ldr\cond\()b \reg, [\ptr], #1 ldr\cond\()b \reg, [\ptr], #1
.endm .endm
#ifdef CONFIG_CPU_USE_DOMAINS
#ifndef CONFIG_THUMB2_KERNEL
#define STR1W_SHIFT 0
#else
#define STR1W_SHIFT 1
#endif
.macro str1w ptr reg abort .macro str1w ptr reg abort
strusr \reg, \ptr, 4, abort=\abort strusr \reg, \ptr, 4, abort=\abort
.endm .endm
...@@ -72,6 +75,20 @@ ...@@ -72,6 +75,20 @@
str1w \ptr, \reg8, \abort str1w \ptr, \reg8, \abort
.endm .endm
#else
#define STR1W_SHIFT 0
.macro str1w ptr reg abort
USERL(\abort, W(str) \reg, [\ptr], #4)
.endm
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
.endm
#endif /* CONFIG_CPU_USE_DOMAINS */
.macro str1b ptr reg cond=al abort .macro str1b ptr reg cond=al abort
strusr \reg, \ptr, 1, \cond, abort=\abort strusr \reg, \ptr, 1, \cond, abort=\abort
.endm .endm
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment