Commit 3eb64c82 authored by Nicolas Pitre's avatar Nicolas Pitre Committed by Russell King

[ARM PATCH] 1678/1: correct and better do_div() implementation for ARM

Patch from Nicolas Pitre

Here's a rewrite of the ARM do_div() implementation.  It is much
faster and smarter than the current code, and it also takes
advantage of ARMv5+ instructions when target processor allows it.

The current code also deserves to be killed ASAP since it overflows
and fails to compute correct values in many cases.  For example:

	u64 n = 2200000001;
	u32 x = 2200000000;
	u32 r = do_div(n, x);

This currently returns n = 41 and r = 46829569 which is obviously bad.

Another failing example is n=15000000000000000000 and x=3000000000.
parent 24d4f462
......@@ -68,8 +68,8 @@ extern void __umoddi3(void);
extern void __udivmoddi4(void);
extern void __udivsi3(void);
extern void __umodsi3(void);
extern void __do_div64(void);
extern void abort(void);
extern void do_div64(void);
extern void ret_from_exception(void);
extern void fpundefinstr(void);
......@@ -223,7 +223,7 @@ EXPORT_SYMBOL_NOVERS(__umoddi3);
EXPORT_SYMBOL_NOVERS(__udivmoddi4);
EXPORT_SYMBOL_NOVERS(__udivsi3);
EXPORT_SYMBOL_NOVERS(__umodsi3);
EXPORT_SYMBOL_NOVERS(do_div64);
EXPORT_SYMBOL_NOVERS(__do_div64);
/* bitops */
EXPORT_SYMBOL(_set_bit_le);
......
/*
* linux/arch/arm/lib/div64.S
*
* Optimized computation of 64-bit dividend / 32-bit divisor
*
* Author: Nicolas Pitre
* Created: Oct 5, 2003
* Copyright: Monta Vista Software, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#ifndef __ARMEB__
ql .req r0 @ quotient low
qh .req r1 @ quotient high
onl .req r0 @ original dividend low
onh .req r1 @ original dividend high
nl .req r4 @ dividend low
nh .req r5 @ dividend high
res .req r4 @ result
#ifdef __ARMEB__
#define xh r0
#define xl r1
#define yh r2
#define yl r3
#else
ql .req r1
qh .req r0
onl .req r1
onh .req r0
nl .req r5
nh .req r4
res .req r5
#define xl r0
#define xh r1
#define yl r2
#define yh r3
#endif
dl .req r3 @ divisor low
dh .req r2 @ divsor high
ENTRY(do_div64)
stmfd sp!, {r4, r5, lr}
mov nl, onl
movs nh, onh @ if high bits are zero
movne lr, #33
moveq lr, #1 @ only divide low bits
moveq nh, onl
1: cmp nh, dh
bls 2f
add lr, lr, #1
movs dh, dh, lsl #1 @ left justify disor
bpl 1b
2: movs nh, onh
moveq dl, dh
moveq dh, #0
movne dl, #0
mov ql, #0
mov qh, #0
3: subs ip, nl, dl @ trial subtraction
sbcs ip, nh, dh
movcs nh, ip @ only update if successful
subcs nl, nl, dl @ (repeat the subtraction)
adcs ql, ql, ql @ C=1 if successful, shift into
adc qh, qh, qh @ quotient
movs dh, dh, lsr #1 @ shift base high part right
mov dl, dl, rrx @ shift base low part right
subs lr, lr, #1
bne 3b
mov r2, res
ldmfd sp!, {r4, r5, pc}
/*
* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
*
* Note: Calling convention is totally non standard for optimal code.
* This is meant to be used by do_div() from include/asm/div64.h only.
*
* Input parameters:
* xh-xl = dividend (clobbered)
* r4 = divisor (preserved)
*
* Output values:
* yh-yl = result
* xh = remainder
*
* Clobbered regs: xl, ip
*/
ENTRY(__do_div64)
@ Test for easy paths first.
subs ip, r4, #1
bls 9f @ divisor is 0 or 1
tst ip, r4
beq 8f @ divisor is power of 2
@ See if we need to handle upper 32-bit result.
cmp xh, r4
mov yh, #0
blo 3f
@ Align divisor with upper part of dividend.
@ The aligned divisor is stored in yl preserving the original.
@ The bit position is stored in ip.
#if __LINUX_ARM_ARCH__ >= 5
clz yl, r4
clz ip, xh
sub yl, yl, ip
mov ip, #1
mov ip, ip, lsl yl
mov yl, r4, lsl yl
#else
mov yl, r4
mov ip, #1
1: cmp yl, #0x80000000
cmpcc yl, xh
movcc yl, yl, lsl #1
movcc ip, ip, lsl #1
bcc 1b
#endif
@ The division loop for needed upper bit positions.
@ Break out early if dividend reaches 0.
2: cmp xh, yl
orrcs yh, yh, ip
subcss xh, xh, yl
movnes ip, ip, lsr #1
mov yl, yl, lsr #1
bne 2b
@ See if we need to handle lower 32-bit result.
3: cmp xh, #0
mov yl, #0
cmpeq xl, r4
movlo xh, xl
movlo pc, lr
@ The division loop for lower bit positions.
@ Here we shift remainer bits leftwards rather than moving the
@ divisor for comparisons, considering the carry-out bit as well.
mov ip, #0x80000000
4: movs xl, xl, lsl #1
adcs xh, xh, xh
beq 6f
cmpcc xh, r4
5: orrcs yl, yl, ip
subcs xh, xh, r4
movs ip, ip, lsr #1
bne 4b
mov pc, lr
@ The top part of remainder became zero. If carry is set
@ (the 33th bit) this is a false positive so resume the loop.
@ Otherwise, if lower part is also null then we're done.
6: bcs 5b
cmp xl, #0
moveq pc, lr
@ We still have remainer bits in the low part. Bring them up.
#if __LINUX_ARM_ARCH__ >= 5
clz xh, xl @ we know xh is zero here so...
add xh, xh, #1
mov xl, xl, lsl xh
mov ip, ip, lsr xh
#else
7: movs xl, xl, lsl #1
mov ip, ip, lsr #1
bcc 7b
#endif
@ Current remainder is now 1. It's worthless to compare with
@ divisor at this point since divisor can't be smaller than 3 here.
@ If possible, branch for another shift in the division loop.
@ If no bit position left then we're done.
movs ip, ip, lsr #1
mov xh, #1
bne 4b
mov pc, lr
8: @ Division by a power of 2: determine what that divisor order is
@ then simply shift values around
#if __LINUX_ARM_ARCH__ >= 5
clz ip, r4
rsb ip, ip, #31
#else
mov yl, r4
cmp r4, #(1 << 16)
mov ip, #0
movhs yl, yl, lsr #16
movhs ip, #16
cmp yl, #(1 << 8)
movhs yl, yl, lsr #8
addhs ip, ip, #8
cmp yl, #(1 << 4)
movhs yl, yl, lsr #4
addhs ip, ip, #4
cmp yl, #(1 << 2)
addhi ip, ip, #3
addls ip, ip, yl, lsr #1
#endif
mov yh, xh, lsr ip
mov yl, xl, lsr ip
rsb ip, ip, #32
orr yl, yl, xh, lsl ip
mov xh, xl, lsl ip
mov xh, xh, lsr ip
mov pc, lr
@ eq -> division by 1: obvious enough...
9: moveq yl, xl
moveq yh, xh
moveq xh, #0
moveq pc, lr
@ Division by 0:
str lr, [sp, #-4]!
bl __div0
@ as wrong as it could be...
mov yl, #0
mov yh, #0
mov xh, #0
ldr pc, [sp], #4
#ifndef __ASM_ARM_DIV64
#define __ASM_ARM_DIV64
/* We're not 64-bit, but... */
/*
* The semantics of do_div() are:
*
* uint32_t do_div(uint64_t *n, uint32_t base)
* {
* uint32_t remainder = *n % base;
* *n = *n / base;
* return remainder;
* }
*
* In other words, a 64-bit dividend with a 32-bit divisor producing
* a 64-bit result and a 32-bit remainder. To accomplish this optimally
* we call a special __do_div64 helper with completely non standard
* calling convention for arguments and results (beware).
*/
#ifdef __ARMEB__
#define __xh "r0"
#define __xl "r1"
#else
#define __xl "r0"
#define __xh "r1"
#endif
#define do_div(n,base) \
({ \
register int __res asm("r2") = base; \
register unsigned int __base asm("r4") = base; \
register unsigned long long __n asm("r0") = n; \
asm("bl do_div64" \
: "=r" (__n), "=r" (__res) \
: "0" (__n), "1" (__res) \
: "r3", "ip", "lr", "cc"); \
n = __n; \
__res; \
register unsigned long long __res asm("r2"); \
register unsigned int __rem asm(__xh); \
asm("bl __do_div64" \
: "=r" (__rem), "=r" (__res) \
: "r" (__n), "r" (__base) \
: "ip", "lr", "cc"); \
n = __res; \
__rem; \
})
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment