Commit d46808dc authored by Greg Ungerer's avatar Greg Ungerer Committed by Linus Torvalds

[PATCH] m68knommu: fix calculation overflow in udelay() on fast CPU's

Re-work the udelay() function. On fast ColdFire CPU's we are
overflowing in the calculation and getting totally bogus
results.

On old 68k and ColdFire CPU's we have no 32*32->64 multiply
instruction. So we resort to a mul/shift calculation similar
to what ARM uses.
Signed-off-by: default avatarGreg Ungerer <gerg@snapgear.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent b21083fc
#ifndef _M68KNOMMU_DELAY_H #ifndef _M68KNOMMU_DELAY_H
#define _M68KNOMMU_DELAY_H #define _M68KNOMMU_DELAY_H
#include <asm/param.h>
/* /*
* Copyright (C) 1994 Hamish Macdonald * Copyright (C) 1994 Hamish Macdonald
* * Copyright (C) 2004 Greg Ungerer <gerg@snapgear.com>
* Delay routines, using a pre-computed "loops_per_second" value.
*/ */
#include <asm/param.h>
extern __inline__ void __delay(unsigned long loops) extern __inline__ void __delay(unsigned long loops)
{ {
#if defined(CONFIG_COLDFIRE) #if defined(CONFIG_COLDFIRE)
...@@ -34,35 +33,27 @@ extern __inline__ void __delay(unsigned long loops) ...@@ -34,35 +33,27 @@ extern __inline__ void __delay(unsigned long loops)
} }
/* /*
* Use only for very small delays ( < 1 msec). Should probably use a * Ideally we use a 32*32->64 multiply to calculate the number of
* lookup table, really, as the multiplications take much too long with * loop iterations, but the older standard 68k and ColdFire do not
* short delays. This is a "reasonable" implementation, though (and the * have this instruction. So for them we have a clsoe approximation
* first constant multiplications gets optimized away if the delay is * loop using 32*32->32 multiplies only. This calculation based on
* a constant) * the ARM version of delay.
*
* We want to implement:
*
* loops = (usecs * 0x10c6 * HZ * loops_per_jiffy) / 2^32
*/ */
#define HZSCALE (268435456 / (1000000/HZ))
extern unsigned long loops_per_jiffy; extern unsigned long loops_per_jiffy;
extern __inline__ void udelay(unsigned long usecs) extern __inline__ void _udelay(unsigned long usecs)
{ {
#ifdef CONFIG_M68332 #if defined(CONFIG_M68328) || defined(CONFIG_M68EZ328) || \
usecs *= 0x000010c6; defined(CONFIG_M68VZ328) || defined(CONFIG_M68360) || \
__asm__ __volatile__ ("mulul %1,%0:%2" defined(CONFIG_COLDFIRE)
: "=d" (usecs) __delay((((usecs * HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6);
: "d" (usecs),
"d" (loops_per_jiffy*HZ));
__delay(usecs);
#elif defined(CONFIG_M68328) || defined(CONFIG_M68EZ328) || \
defined(CONFIG_COLDFIRE) || defined(CONFIG_M68360) || \
defined(CONFIG_M68VZ328)
register unsigned long full_loops, part_loops;
full_loops = ((usecs * HZ) / 1000000) * loops_per_jiffy;
usecs %= (1000000 / HZ);
part_loops = (usecs * HZ * loops_per_jiffy) / 1000000;
__delay(full_loops + part_loops);
#else #else
unsigned long tmp; unsigned long tmp;
...@@ -74,4 +65,12 @@ extern __inline__ void udelay(unsigned long usecs) ...@@ -74,4 +65,12 @@ extern __inline__ void udelay(unsigned long usecs)
#endif #endif
} }
/*
* Moved the udelay() function into library code, no longer inlined.
* I had to change the algorithm because we are overflowing now on
* the faster ColdFire parts. The code is a little biger, so it makes
* sense to library it.
*/
extern void udelay(unsigned long usecs);
#endif /* defined(_M68KNOMMU_DELAY_H) */ #endif /* defined(_M68KNOMMU_DELAY_H) */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment