[PATCH] m68knommu: fix calculation overflow in udelay() on fast CPU's

Re-work the udelay() function. On fast ColdFire CPU's we are overflowing in the calculation and getting totally bogus results. On old 68k and ColdFire CPU's we have no 32*32->64 multiply instruction. So we resort to a mul/shift calculation similar to what ARM uses. Signed-off-by: Greg Ungerer <gerg@snapgear.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] m68knommu: fix calculation overflow in udelay() on fast CPU's
Re-work the udelay() function. On fast ColdFire CPU's we are overflowing in the calculation and getting totally bogus results. On old 68k and ColdFire CPU's we have no 32*32->64 multiply instruction. So we resort to a mul/shift calculation similar to what ARM uses. Signed-off-by: Greg Ungerer <gerg@snapgear.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
d46808dc · Greg Ungerer · Linus Torvalds · b21083fc · d46808dc
Commit d46808dc authored Oct 31, 2004 by Greg Ungerer Committed by Linus Torvalds Oct 31, 2004
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 28 deletions

include/asm-m68knommu/delay.h include/asm-m68knommu/delay.h +27 -28

No files found.
--- a/include/asm-m68knommu/delay.h
+++ b/include/asm-m68knommu/delay.h
 #ifndef _M68KNOMMU_DELAY_H
 #define _M68KNOMMU_DELAY_H
-#include <asm/param.h>
 /*
 * Copyright (C) 1994 Hamish Macdonald
- *
+ * Copyright (C) 2004 Greg Ungerer <gerg@snapgear.com>
- * Delay routines, using a pre-computed "loops_per_second" value.
 */
+#include <asm/param.h>
 extern __inline__ void __delay(unsigned long loops)
 {
 #if defined(CONFIG_COLDFIRE)
@@ -34,35 +33,27 @@ extern __inline__ void __delay(unsigned long loops)
 }
 /*
- * Use only for very small delays ( < 1 msec).  Should probably use a
+ *	Ideally we use a 32*32->64 multiply to calculate the number of
- * lookup table, really, as the multiplications take much too long with
+ *	loop iterations, but the older standard 68k and ColdFire do not
- * short delays.  This is a "reasonable" implementation, though (and the
+ *	have this instruction. So for them we have a clsoe approximation
- * first constant multiplications gets optimized away if the delay is
+ *	loop using 32*32->32 multiplies only. This calculation based on
- * a constant)  
+ *	the ARM version of delay.
+ *
+ *	We want to implement:
+ *
+ *	loops = (usecs * 0x10c6 * HZ * loops_per_jiffy) / 2^32
 */
+#define	HZSCALE		(268435456 / (1000000/HZ))
 extern unsigned long loops_per_jiffy;
-extern __inline__ void udelay(unsigned long usecs)
+extern __inline__ void _udelay(unsigned long usecs)
 {
-#ifdef CONFIG_M68332
+#if defined(CONFIG_M68328) || defined(CONFIG_M68EZ328) || \
-        usecs *= 0x000010c6;       
+    defined(CONFIG_M68VZ328) || defined(CONFIG_M68360) || \
-       __asm__ __volatile__ ("mulul %1,%0:%2"
+    defined(CONFIG_COLDFIRE)
-                    : "=d" (usecs)
+	__delay((((usecs * HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6);
-                  : "d" (usecs),
-                   "d" (loops_per_jiffy*HZ));
-	__delay(usecs);
-#elif defined(CONFIG_M68328) || defined(CONFIG_M68EZ328) || \
-		defined(CONFIG_COLDFIRE) || defined(CONFIG_M68360) || \
-		defined(CONFIG_M68VZ328)
-	register unsigned long full_loops, part_loops;
-	full_loops = ((usecs * HZ) / 1000000) * loops_per_jiffy;
-	usecs %= (1000000 / HZ);
-	part_loops = (usecs * HZ * loops_per_jiffy) / 1000000;
-	__delay(full_loops + part_loops);
 #else
 	unsigned long tmp;
@@ -74,4 +65,12 @@ extern __inline__ void udelay(unsigned long usecs)
 #endif
 }
+/*
+ *	Moved the udelay() function into library code, no longer inlined.
+ *	I had to change the algorithm because we are overflowing now on
+ *	the faster ColdFire parts. The code is a little biger, so it makes
+ *	sense to library it.
+ */
+extern void udelay(unsigned long usecs);
 #endif /* defined(_M68KNOMMU_DELAY_H) */