Commit f2db633d authored by Michael Cree's avatar Michael Cree Committed by Linus Torvalds

alpha: Use new generic strncpy_from_user() and strnlen_user()

Similar to x86/sparc/powerpc implementations except:
1) we implement an extremely efficient has_zero()/find_zero()
   sequence with both prep_zero_mask() and create_zero_mask()
   no-operations.
2) Our output from prep_zero_mask() differs in that only the
   lowest eight bits are used to represent the zero bytes
   nevertheless it can be safely ORed with other similar masks
   from prep_zero_mask() and forms input to create_zero_mask(),
   the two fundamental properties prep_zero_mask() must satisfy.

Tests on EV67 and EV68 CPUs revealed that the generic code is
essentially as fast (to within 0.5% of CPU cycles) of the old
Alpha specific code for large quadword-aligned strings, despite
the 30% extra CPU instructions executed.  In contrast, the
generic code for unaligned strings is substantially slower (by
more than a factor of 3) than the old Alpha specific code.
Signed-off-by: default avatarMichael Cree <mcree@orcon.net.nz>
Acked-by: default avatarMatt Turner <mattst88@gmail.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d8d5da12
......@@ -18,6 +18,8 @@ config ALPHA
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
......
......@@ -433,36 +433,12 @@ clear_user(void __user *to, long len)
#undef __module_address
#undef __module_call
/* Returns: -EFAULT if exception before terminator, N if the entire
buffer filled, else strlen. */
#define user_addr_max() \
(segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
extern long __strncpy_from_user(char *__to, const char __user *__from, long __to_len);
extern inline long
strncpy_from_user(char *to, const char __user *from, long n)
{
long ret = -EFAULT;
if (__access_ok((unsigned long)from, 0, get_fs()))
ret = __strncpy_from_user(to, from, n);
return ret;
}
/* Returns: 0 if bad, string length+1 (memory size) of string if ok */
extern long __strlen_user(const char __user *);
extern inline long strlen_user(const char __user *str)
{
return access_ok(VERIFY_READ,str,0) ? __strlen_user(str) : 0;
}
/* Returns: 0 if exception before NUL or reaching the supplied limit (N),
* a value greater than N if the limit would be exceeded, else strlen. */
extern long __strnlen_user(const char __user *, long);
extern inline long strnlen_user(const char __user *str, long n)
{
return access_ok(VERIFY_READ,str,0) ? __strnlen_user(str, n) : 0;
}
extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern __must_check long strlen_user(const char __user *str);
extern __must_check long strnlen_user(const char __user *str, long n);
/*
* About the exception table:
......
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
#include <asm/compiler.h>
/*
* word-at-a-time interface for Alpha.
*/
/*
* We do not use the word_at_a_time struct on Alpha, but it needs to be
* implemented to humour the generic code.
*/
struct word_at_a_time {
const unsigned long unused;
};
#define WORD_AT_A_TIME_CONSTANTS { 0 }
/* Return nonzero if val has a zero */
static inline unsigned long has_zero(unsigned long val, unsigned long *bits, const struct word_at_a_time *c)
{
unsigned long zero_locations = __kernel_cmpbge(0, val);
*bits = zero_locations;
return zero_locations;
}
static inline unsigned long prep_zero_mask(unsigned long val, unsigned long bits, const struct word_at_a_time *c)
{
return bits;
}
#define create_zero_mask(bits) (bits)
static inline unsigned long find_zero(unsigned long bits)
{
#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
/* Simple if have CIX instructions */
return __kernel_cttz(bits);
#else
unsigned long t1, t2, t3;
/* Retain lowest set bit only */
bits &= -bits;
/* Binary search for lowest set bit */
t1 = bits & 0xf0;
t2 = bits & 0xcc;
t3 = bits & 0xaa;
if (t1) t1 = 4;
if (t2) t2 = 2;
if (t3) t3 = 1;
return t1 + t2 + t3;
#endif
}
#endif /* _ASM_WORD_AT_A_TIME_H */
......@@ -74,8 +74,6 @@ EXPORT_SYMBOL(alpha_fp_emul);
*/
EXPORT_SYMBOL(__copy_user);
EXPORT_SYMBOL(__do_clear_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(__strnlen_user);
/*
* SMP-specific symbols.
......
......@@ -31,8 +31,6 @@ lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \
$(ev6-y)memchr.o \
$(ev6-y)copy_user.o \
$(ev6-y)clear_user.o \
$(ev6-y)strncpy_from_user.o \
$(ev67-y)strlen_user.o \
$(ev6-y)csum_ipv6_magic.o \
$(ev6-y)clear_page.o \
$(ev6-y)copy_page.o \
......
This diff is collapsed.
/*
* arch/alpha/lib/ev67-strlen_user.S
* 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com>
*
* Return the length of the string including the NULL terminator
* (strlen+1) or zero if an error occurred.
*
* In places where it is critical to limit the processing time,
* and the data is not trusted, strnlen_user() should be used.
* It will return a value greater than its second argument if
* that limit would be exceeded. This implementation is allowed
* to access memory beyond the limit, but will not cross a page
* boundary when doing so.
*
* Much of the information about 21264 scheduling/coding comes from:
* Compiler Writer's Guide for the Alpha 21264
* abbreviated as 'CWG' in other comments here
* ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
* Scheduling notation:
* E - either cluster
* U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
* L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
* Try not to change the actual algorithm if possible for consistency.
*/
#include <asm/regdef.h>
/* Allow an exception for an insn; exit if we get one. */
#define EX(x,y...) \
99: x,##y; \
.section __ex_table,"a"; \
.long 99b - .; \
lda v0, $exception-99b(zero); \
.previous
.set noreorder
.set noat
.text
.globl __strlen_user
.ent __strlen_user
.frame sp, 0, ra
.align 4
__strlen_user:
ldah a1, 32767(zero) # do not use plain strlen_user() for strings
# that might be almost 2 GB long; you should
# be using strnlen_user() instead
nop
nop
nop
.globl __strnlen_user
.align 4
__strnlen_user:
.prologue 0
EX( ldq_u t0, 0(a0) ) # L : load first quadword (a0 may be misaligned)
lda t1, -1(zero) # E :
insqh t1, a0, t1 # U :
andnot a0, 7, v0 # E :
or t1, t0, t0 # E :
subq a0, 1, a0 # E : get our +1 for the return
cmpbge zero, t0, t1 # E : t1 <- bitmask: bit i == 1 <==> i-th byte == 0
subq a1, 7, t2 # E :
subq a0, v0, t0 # E :
bne t1, $found # U :
addq t2, t0, t2 # E :
addq a1, 1, a1 # E :
nop # E :
nop # E :
.align 4
$loop: ble t2, $limit # U :
EX( ldq t0, 8(v0) ) # L :
nop # E :
nop # E :
cmpbge zero, t0, t1 # E :
subq t2, 8, t2 # E :
addq v0, 8, v0 # E : addr += 8
beq t1, $loop # U :
$found: cttz t1, t2 # U0 :
addq v0, t2, v0 # E :
subq v0, a0, v0 # E :
ret # L0 :
$exception:
nop
nop
nop
ret
.align 4 # currently redundant
$limit:
nop
nop
subq a1, t2, v0
ret
.end __strlen_user
/*
* arch/alpha/lib/strlen_user.S
*
* Return the length of the string including the NUL terminator
* (strlen+1) or zero if an error occurred.
*
* In places where it is critical to limit the processing time,
* and the data is not trusted, strnlen_user() should be used.
* It will return a value greater than its second argument if
* that limit would be exceeded. This implementation is allowed
* to access memory beyond the limit, but will not cross a page
* boundary when doing so.
*/
#include <asm/regdef.h>
/* Allow an exception for an insn; exit if we get one. */
#define EX(x,y...) \
99: x,##y; \
.section __ex_table,"a"; \
.long 99b - .; \
lda v0, $exception-99b(zero); \
.previous
.set noreorder
.set noat
.text
.globl __strlen_user
.ent __strlen_user
.frame sp, 0, ra
.align 3
__strlen_user:
ldah a1, 32767(zero) # do not use plain strlen_user() for strings
# that might be almost 2 GB long; you should
# be using strnlen_user() instead
.globl __strnlen_user
.align 3
__strnlen_user:
.prologue 0
EX( ldq_u t0, 0(a0) ) # load first quadword (a0 may be misaligned)
lda t1, -1(zero)
insqh t1, a0, t1
andnot a0, 7, v0
or t1, t0, t0
subq a0, 1, a0 # get our +1 for the return
cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0
subq a1, 7, t2
subq a0, v0, t0
bne t1, $found
addq t2, t0, t2
addq a1, 1, a1
.align 3
$loop: ble t2, $limit
EX( ldq t0, 8(v0) )
subq t2, 8, t2
addq v0, 8, v0 # addr += 8
cmpbge zero, t0, t1
beq t1, $loop
$found: negq t1, t2 # clear all but least set bit
and t1, t2, t1
and t1, 0xf0, t2 # binary search for that set bit
and t1, 0xcc, t3
and t1, 0xaa, t4
cmovne t2, 4, t2
cmovne t3, 2, t3
cmovne t4, 1, t4
addq t2, t3, t2
addq v0, t4, v0
addq v0, t2, v0
nop # dual issue next two on ev4 and ev5
subq v0, a0, v0
$exception:
ret
.align 3 # currently redundant
$limit:
subq a1, t2, v0
ret
.end __strlen_user
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment