[NWFPE] performance improvements [Part 4]

Fourth part of NWFPE performance improvements, building on patch 1466/1. This patch makes 80-bit (extended precision) a compile-time option in NWFPE. The effect of the extra code path on 32/64 bit operations is not as significant as I had first thought - speedup is between 0.7 and 1.5% depending on which tree (rmk or nw) you do the changes in. However the reduction in code size, particularly when softfloat has the same functions cut out, may be of value on embedded systems.

[NWFPE] performance improvements [Part 4]
Fourth part of NWFPE performance improvements, building on patch 1466/1. This patch makes 80-bit (extended precision) a compile-time option in NWFPE. The effect of the extra code path on 32/64 bit operations is not as significant as I had first thought - speedup is between 0.7 and 1.5% depending on which tree (rmk or nw) you do the changes in. However the reduction in code size, particularly when softfloat has the same functions cut out, may be of value on embedded systems.
fb24dfcb · Ralph Siemsen · Russell King · a64e20ce · fb24dfcb · fb24dfcb
Commit fb24dfcb authored Mar 29, 2003 by Ralph Siemsen Committed by Russell King Mar 29, 2003
13 changed files
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -584,6 +584,18 @@ config FPE_NWFPE
 	  You may say N here if you are going to load the Acorn FPEmulator
 	  early in the bootup.

+config FPE_NWFPE_XP
+	bool "Support extended precision"
+	depends on FPE_NWFPE
+	help
+	  Say Y to include 80-bit support in the kernel floating-point
+	  emulator.  Otherwise, only 32 and 64-bit support is compiled in.
+	  Note that gcc does not generate 80-bit operations by default,
+	  so in most cases this option only enlarges the size of the
+	  floating point emulator without any good reason.
+
+	  You almost surely want to say N here.
+
 config FPE_FASTFPE
 	tristate "FastFPE math emulation (EXPERIMENTAL)"
 	depends on !CPU_26 && !CPU_32v3 && EXPERIMENTAL

--- a/arch/arm/nwfpe/ChangeLog
+++ b/arch/arm/nwfpe/ChangeLog
@@ -3,6 +3,7 @@
 	  Used "indent -kr -i8 -ts8 -sob -l132 -ss" and a few manual fixups.
 	* Removed dead code and fixed function protypes to match definitions.
 	* Consolidated use of (opcode && MASK_ARITHMETIC_OPCODE) >> 20.
+	* Make 80-bit precision a compile-time option.

 2002-01-19  Russell King <rmk@arm.linux.org.uk>


--- a/arch/arm/nwfpe/Makefile
+++ b/arch/arm/nwfpe/Makefile
@@ -10,7 +10,11 @@ obj-$(CONFIG_FPE_NWFPE)	+= nwfpe.o

 nwfpe-objs		:= fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \
 			   fpmodule.o fpopcode.o softfloat.o \
-			   single_cpdo.o double_cpdo.o extended_cpdo.o
+			   single_cpdo.o double_cpdo.o
+
+ifeq ($(CONFIG_FPE_NWFPE_XP),y)
+nwfpe-objs		+= extended_cpdo.o
+endif

 ifeq ($(CONFIG_CPU_26),y)
 nwfpe-objs		+= entry26.o

--- a/arch/arm/nwfpe/fpa11.c
+++ b/arch/arm/nwfpe/fpa11.c
 /*
    NetWinder Floating Point Emulator
    (c) Rebel.COM, 1998,1999
+    (c) Philip Blundell, 2001

    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>

@@ -73,6 +74,7 @@ void SetRoundingMode(const unsigned int opcode)

 void SetRoundingPrecision(const unsigned int opcode)
 {
+#ifdef CONFIG_FPE_NWFPE_XP
 	switch (opcode & MASK_ROUNDING_PRECISION) {
 	case ROUND_SINGLE:
 		floatx80_rounding_precision = 32;
@@ -89,6 +91,7 @@ void SetRoundingPrecision(const unsigned int opcode)
 	default:
 		floatx80_rounding_precision = 80;
 	}
+#endif
 }

 void nwfpe_init(union fp_state *fp)

--- a/arch/arm/nwfpe/fpa11.h
+++ b/arch/arm/nwfpe/fpa11.h
@@ -37,6 +37,7 @@ register unsigned int *user_registers asm("sl");

 /* includes */
 #include "fpsr.h"		/* FP control and status register definitions */
+#include "milieu.h"
 #include "softfloat.h"

 #define		typeNone		0x00
@@ -48,9 +49,13 @@ register unsigned int *user_registers asm("sl");
 * This must be no more and no less than 12 bytes.
 */
 typedef union tagFPREG {
-	floatx80 fExtended;
-	float64 fDouble;
 	float32 fSingle;
+	float64 fDouble;
+#ifdef CONFIG_FPE_NWFPE_XP
+	floatx80 fExtended;
+#else
+	int padding[3];
+#endif
 } FPREG;

 /*

--- a/arch/arm/nwfpe/fpa11_cpdo.c
+++ b/arch/arm/nwfpe/fpa11_cpdo.c
 /*
    NetWinder Floating Point Emulator
    (c) Rebel.COM, 1998,1999
+    (c) Philip Blundell, 2001

    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>

@@ -66,9 +67,11 @@ unsigned int EmulateCPDO(const unsigned int opcode)
 	case typeDouble:
 		nRc = DoubleCPDO(opcode, rFd);
 		break;
+#ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
 		nRc = ExtendedCPDO(opcode, rFd);
 		break;
+#endif
 	default:
 		nRc = 0;
 	}
@@ -83,6 +86,7 @@ unsigned int EmulateCPDO(const unsigned int opcode)

 		fpa11->fType[getFd(opcode)] = nDest;

+#ifdef CONFIG_FPE_NWFPE_XP
 		if (nDest != nType) {
 			switch (nDest) {
 			case typeSingle:
@@ -113,6 +117,14 @@ unsigned int EmulateCPDO(const unsigned int opcode)
 				break;
 			}
 		}
+#else
+		if (nDest != nType) {
+			if (nDest == typeSingle)
+				rFd->fSingle = float64_to_float32(rFd->fDouble);
+			else
+				rFd->fDouble = float32_to_float64(rFd->fSingle);
+		}
+#endif
 	}

 	return nRc;

--- a/arch/arm/nwfpe/fpa11_cpdt.c
+++ b/arch/arm/nwfpe/fpa11_cpdt.c
 /*
    NetWinder Floating Point Emulator
    (c) Rebel.com, 1998-1999
-    (c) Philip Blundell, 1998
+    (c) Philip Blundell, 1998, 2001

    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>

@@ -45,6 +45,7 @@ static inline void loadDouble(const unsigned int Fn, const unsigned int *pMem)
 	get_user(p[1], &pMem[0]);	/* sign & exponent */
 }

+#ifdef CONFIG_FPE_NWFPE_XP
 static inline void loadExtended(const unsigned int Fn, const unsigned int *pMem)
 {
 	FPA11 *fpa11 = GET_FPA11();
@@ -55,6 +56,7 @@ static inline void loadExtended(const unsigned int Fn, const unsigned int *pMem)
 	get_user(p[1], &pMem[2]);	/* ls bits */
 	get_user(p[2], &pMem[1]);	/* ms bits */
 }
+#endif

 static inline void loadMultiple(const unsigned int Fn, const unsigned int *pMem)
 {
@@ -76,6 +78,7 @@ static inline void loadMultiple(const unsigned int Fn, const unsigned int *pMem)
 		}
 		break;

+#ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
 		{
 			get_user(p[1], &pMem[2]);
@@ -83,6 +86,7 @@ static inline void loadMultiple(const unsigned int Fn, const unsigned int *pMem)
 			p[0] = (x & 0x80003fff);
 		}
 		break;
+#endif
 	}
 }

@@ -99,9 +103,11 @@ static inline void storeSingle(const unsigned int Fn, unsigned int *pMem)
 		val.f = float64_to_float32(fpa11->fpreg[Fn].fDouble);
 		break;

+#ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
 		val.f = floatx80_to_float32(fpa11->fpreg[Fn].fExtended);
 		break;
+#endif

 	default:
 		val.f = fpa11->fpreg[Fn].fSingle;
@@ -123,9 +129,11 @@ static inline void storeDouble(const unsigned int Fn, unsigned int *pMem)
 		val.f = float32_to_float64(fpa11->fpreg[Fn].fSingle);
 		break;

+#ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
 		val.f = floatx80_to_float64(fpa11->fpreg[Fn].fExtended);
 		break;
+#endif

 	default:
 		val.f = fpa11->fpreg[Fn].fDouble;
@@ -135,6 +143,7 @@ static inline void storeDouble(const unsigned int Fn, unsigned int *pMem)
 	put_user(val.i[0], &pMem[1]);	/* lsw */
 }

+#ifdef CONFIG_FPE_NWFPE_XP
 static inline void storeExtended(const unsigned int Fn, unsigned int *pMem)
 {
 	FPA11 *fpa11 = GET_FPA11();
@@ -160,6 +169,7 @@ static inline void storeExtended(const unsigned int Fn, unsigned int *pMem)
 	put_user(val.i[1], &pMem[2]);
 	put_user(val.i[2], &pMem[1]);	/* msw */
 }
+#endif

 static inline void storeMultiple(const unsigned int Fn, unsigned int *pMem)
 {
@@ -179,6 +189,7 @@ static inline void storeMultiple(const unsigned int Fn, unsigned int *pMem)
 		}
 		break;

+#ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
 		{
 			put_user(p[2], &pMem[1]);	/* msw */
@@ -186,6 +197,7 @@ static inline void storeMultiple(const unsigned int Fn, unsigned int *pMem)
 			put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]);
 		}
 		break;
+#endif
 	}
 }

@@ -218,9 +230,11 @@ unsigned int PerformLDF(const unsigned int opcode)
 	case TRANSFER_DOUBLE:
 		loadDouble(getFd(opcode), pAddress);
 		break;
+#ifdef CONFIG_FPE_NWFPE_XP
 	case TRANSFER_EXTENDED:
 		loadExtended(getFd(opcode), pAddress);
 		break;
+#endif
 	default:
 		nRc = 0;
 	}
@@ -261,9 +275,11 @@ unsigned int PerformSTF(const unsigned int opcode)
 	case TRANSFER_DOUBLE:
 		storeDouble(getFd(opcode), pAddress);
 		break;
+#ifdef CONFIG_FPE_NWFPE_XP
 	case TRANSFER_EXTENDED:
 		storeExtended(getFd(opcode), pAddress);
 		break;
+#endif
 	default:
 		nRc = 0;
 	}

--- a/arch/arm/nwfpe/fpa11_cprt.c
+++ b/arch/arm/nwfpe/fpa11_cprt.c
 /*
    NetWinder Floating Point Emulator
    (c) Rebel.COM, 1998,1999
-    (c) Philip Blundell, 1999
+    (c) Philip Blundell, 1999, 2001

    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>

@@ -21,14 +21,14 @@
 */

 #include "fpa11.h"
-#include "milieu.h"
-#include "softfloat.h"
 #include "fpopcode.h"
 #include "fpa11.inl"
 #include "fpmodule.h"
 #include "fpmodule.inl"

+#ifdef CONFIG_FPE_NWFPE_XP
 extern flag floatx80_is_nan(floatx80);
+#endif
 extern flag float64_is_nan(float64);
 extern flag float32_is_nan(float32);

@@ -41,23 +41,22 @@ static unsigned int PerformComparison(const unsigned int opcode);

 unsigned int EmulateCPRT(const unsigned int opcode)
 {
-	unsigned int nRc = 1;

 	if (opcode & 0x800000) {
-		/* This is some variant of a comparison (PerformComparison will
-		   sort out which one).  Since most of the other CPRT
-		   instructions are oddball cases of some sort or other it makes
-		   sense to pull this out into a fast path.  */
+		/* This is some variant of a comparison (PerformComparison
+		   will sort out which one).  Since most of the other CPRT
+		   instructions are oddball cases of some sort or other it
+		   makes sense to pull this out into a fast path.  */
 		return PerformComparison(opcode);
 	}

 	/* Hint to GCC that we'd like a jump table rather than a load of CMPs */
 	switch ((opcode & 0x700000) >> 20) {
 	case FLT_CODE >> 20:
-		nRc = PerformFLT(opcode);
+		return PerformFLT(opcode);
 		break;
 	case FIX_CODE >> 20:
-		nRc = PerformFIX(opcode);
+		return PerformFIX(opcode);
 		break;

 	case WFS_CODE >> 20:
@@ -67,28 +66,18 @@ unsigned int EmulateCPRT(const unsigned int opcode)
 		writeRegister(getRd(opcode), readFPSR());
 		break;

-#if 0		/* We currently have no use for the FPCR, so there's no point
-				   in emulating it. */
-	case WFC_CODE >> 20:
-		writeFPCR(readRegister(getRd(opcode)));
-	case RFC_CODE >> 20:
-		writeRegister(getRd(opcode), readFPCR());
-		break;
-#endif
-
 	default:
-		nRc = 0;
+		return 0;
 	}

-	return nRc;
+	return 1;
 }

 unsigned int PerformFLT(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
-
-	unsigned int nRc = 1;
 	SetRoundingMode(opcode);
+	SetRoundingPrecision(opcode);

 	switch (opcode & MASK_ROUNDING_PRECISION) {
 	case ROUND_SINGLE:
@@ -105,24 +94,25 @@ unsigned int PerformFLT(const unsigned int opcode)
 		}
 		break;

+#ifdef CONFIG_FPE_NWFPE_XP
 	case ROUND_EXTENDED:
 		{
 			fpa11->fType[getFn(opcode)] = typeExtended;
 			fpa11->fpreg[getFn(opcode)].fExtended = int32_to_floatx80(readRegister(getRd(opcode)));
 		}
 		break;
+#endif

 	default:
-		nRc = 0;
+		return 0;
 	}

-	return nRc;
+	return 1;
 }

 unsigned int PerformFIX(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
-	unsigned int nRc = 1;
 	unsigned int Fn = getFm(opcode);

 	SetRoundingMode(opcode);
@@ -140,39 +130,18 @@ unsigned int PerformFIX(const unsigned int opcode)
 		}
 		break;

+#ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
 		{
 			writeRegister(getRd(opcode), floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
 		}
 		break;
+#endif

 	default:
-		nRc = 0;
-	}
-
-	return nRc;
-}
-
-static unsigned int __inline__ PerformComparisonOperation(floatx80 Fn, floatx80 Fm)
-{
-	unsigned int flags = 0;
-
-	/* test for less than condition */
-	if (floatx80_lt(Fn, Fm)) {
-		flags |= CC_NEGATIVE;
-	}
-
-	/* test for equal condition */
-	if (floatx80_eq(Fn, Fm)) {
-		flags |= CC_ZERO;
-	}
-
-	/* test for greater than or equal condition */
-	if (floatx80_lt(Fm, Fn)) {
-		flags |= CC_CARRY;
+		return 0;
 	}

-	writeConditionCodes(flags);
 	return 1;
 }

@@ -180,14 +149,13 @@ static unsigned int __inline__ PerformComparisonOperation(floatx80 Fn, floatx80
 static unsigned int PerformComparison(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
-	unsigned int Fn, Fm;
-	floatx80 rFn, rFm;
+	unsigned int Fn = getFn(opcode), Fm = getFm(opcode);
 	int e_flag = opcode & 0x400000;	/* 1 if CxFE */
 	int n_flag = opcode & 0x200000;	/* 1 if CNxx */
 	unsigned int flags = 0;

-	Fn = getFn(opcode);
-	Fm = getFm(opcode);
+#ifdef CONFIG_FPE_NWFPE_XP
+	floatx80 rFn, rFm;

 	/* Check for unordered condition and convert all operands to 80-bit
 	   format.
@@ -254,11 +222,131 @@ static unsigned int PerformComparison(const unsigned int opcode)
 		}
 	}

-	if (n_flag) {
+	if (n_flag)
 		rFm.high ^= 0x8000;
+
+	/* test for less than condition */
+	if (floatx80_lt(rFn, rFm))
+		flags |= CC_NEGATIVE;
+
+	/* test for equal condition */
+	if (floatx80_eq(rFn, rFm))
+		flags |= CC_ZERO;
+
+	/* test for greater than or equal condition */
+	if (floatx80_lt(rFm, rFn))
+		flags |= CC_CARRY;
+
+#else
+	if (CONSTANT_FM(opcode)) {
+		/* Fm is a constant.  Do the comparison in whatever precision
+		   Fn happens to be stored in.  */
+		if (fpa11->fType[Fn] == typeSingle) {
+			float32 rFm = getSingleConstant(Fm);
+			float32 rFn = fpa11->fpreg[Fn].fSingle;
+
+			if (float32_is_nan(rFn))
+				goto unordered;
+
+			if (n_flag)
+				rFm ^= 0x80000000;
+
+			/* test for less than condition */
+			if (float32_lt_nocheck(rFn, rFm))
+				flags |= CC_NEGATIVE;
+
+			/* test for equal condition */
+			if (float32_eq_nocheck(rFn, rFm))
+				flags |= CC_ZERO;
+
+			/* test for greater than or equal condition */
+			if (float32_lt_nocheck(rFm, rFn))
+				flags |= CC_CARRY;
+		} else {
+			float64 rFm = getDoubleConstant(Fm);
+			float64 rFn = fpa11->fpreg[Fn].fDouble;
+
+			if (float64_is_nan(rFn))
+				goto unordered;
+
+			if (n_flag)
+				rFm ^= 0x8000000000000000ULL;
+
+			/* test for less than condition */
+			if (float64_lt_nocheck(rFn, rFm))
+				flags |= CC_NEGATIVE;
+
+			/* test for equal condition */
+			if (float64_eq_nocheck(rFn, rFm))
+				flags |= CC_ZERO;
+
+			/* test for greater than or equal condition */
+			if (float64_lt_nocheck(rFm, rFn))
+				flags |= CC_CARRY;
+		}
+	} else {
+		/* Both operands are in registers.  */
+		if (fpa11->fType[Fn] == typeSingle
+		    && fpa11->fType[Fm] == typeSingle) {
+			float32 rFm = fpa11->fpreg[Fm].fSingle;
+			float32 rFn = fpa11->fpreg[Fn].fSingle;
+
+			if (float32_is_nan(rFn)
+			    || float32_is_nan(rFm))
+				goto unordered;
+
+			if (n_flag)
+				rFm ^= 0x80000000;
+
+			/* test for less than condition */
+			if (float32_lt_nocheck(rFn, rFm))
+				flags |= CC_NEGATIVE;
+
+			/* test for equal condition */
+			if (float32_eq_nocheck(rFn, rFm))
+				flags |= CC_ZERO;
+
+			/* test for greater than or equal condition */
+			if (float32_lt_nocheck(rFm, rFn))
+				flags |= CC_CARRY;
+		} else {
+			/* Promote 32-bit operand to 64 bits.  */
+			float64 rFm, rFn;
+
+			rFm = (fpa11->fType[Fm] == typeSingle) ?
+			    float32_to_float64(fpa11->fpreg[Fm].fSingle)
+			    : fpa11->fpreg[Fm].fDouble;
+
+			rFn = (fpa11->fType[Fn] == typeSingle) ?
+			    float32_to_float64(fpa11->fpreg[Fn].fSingle)
+			    : fpa11->fpreg[Fn].fDouble;
+
+			if (float64_is_nan(rFn)
+			    || float64_is_nan(rFm))
+				goto unordered;
+
+			if (n_flag)
+				rFm ^= 0x8000000000000000ULL;
+
+			/* test for less than condition */
+			if (float64_lt_nocheck(rFn, rFm))
+				flags |= CC_NEGATIVE;
+
+			/* test for equal condition */
+			if (float64_eq_nocheck(rFn, rFm))
+				flags |= CC_ZERO;
+
+			/* test for greater than or equal condition */
+			if (float64_lt_nocheck(rFm, rFn))
+				flags |= CC_CARRY;
+		}
 	}

-	return PerformComparisonOperation(rFn, rFm);
+#endif
+
+	writeConditionCodes(flags);
+
+	return 1;

      unordered:
 	/* ?? The FPA data sheet is pretty vague about this, in particular

--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -42,11 +42,17 @@
 #include "fpa11.inl"

 /* kernel symbols required for signal handling */
+#ifdef CONFIG_FPE_NWFPE_XP
+#define NWFPE_BITS "extended"
+#else
+#define NWFPE_BITS "double"
+#endif
+
 #ifdef MODULE
 void fp_send_sig(unsigned long sig, struct task_struct *p, int priv);
 #if LINUX_VERSION_CODE > 0x20115
 MODULE_AUTHOR("Scott Bambrough <scottb@rebel.com>");
-MODULE_DESCRIPTION("NWFPE floating point emulator");
+MODULE_DESCRIPTION("NWFPE floating point emulator (" NWFPE_BITS " precision)");
 #endif

 #else
@@ -85,8 +91,8 @@ static int __init fpe_init(void)
 		return 0;

 	/* Display title, version and copyright information. */
-	printk(KERN_WARNING "NetWinder Floating Point Emulator V0.95 "
-			    "(c) 1998-1999 Rebel.com\n");
+	printk(KERN_WARNING "NetWinder Floating Point Emulator V0.97 ("
+	       NWFPE_BITS " precision)\n");

 	/* Save pointer to the old FP handler and then patch ourselves in */
 	orig_fp_enter = kern_fp_enter;

--- a/arch/arm/nwfpe/fpopcode.c
+++ b/arch/arm/nwfpe/fpopcode.c
@@ -26,6 +26,7 @@
 #include "fpmodule.h"
 #include "fpmodule.inl"

+#ifdef CONFIG_FPE_NWFPE_XP
 const floatx80 floatx80Constant[] = {
 	{0x0000, 0x0000000000000000ULL},	/* extended 0.0 */
 	{0x3fff, 0x8000000000000000ULL},	/* extended 1.0 */
@@ -36,6 +37,7 @@ const floatx80 floatx80Constant[] = {
 	{0x3ffe, 0x8000000000000000ULL},	/* extended 0.5 */
 	{0x4002, 0xa000000000000000ULL}		/* extended 10.0 */
 };
+#endif

 const float64 float64Constant[] = {
 	0x0000000000000000ULL,	/* double 0.0 */

--- a/arch/arm/nwfpe/fpopcode.h
+++ b/arch/arm/nwfpe/fpopcode.h
 /*
    NetWinder Floating Point Emulator
    (c) Rebel.COM, 1998,1999
+    (c) Philip Blundell, 2001

    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>

@@ -366,11 +367,13 @@ TABLE 5
 /* Get the rounding mode from the opcode. */
 #define getRoundingMode(opcode)		((opcode & MASK_ROUNDING_MODE) >> 5)

+#ifdef CONFIG_FPE_NWFPE_XP
 static inline const floatx80 getExtendedConstant(const unsigned int nIndex)
 {
 	extern const floatx80 floatx80Constant[];
 	return floatx80Constant[nIndex];
 }
+#endif

 static inline const float64 getDoubleConstant(const unsigned int nIndex)
 {

--- a/arch/arm/nwfpe/softfloat.c
+++ b/arch/arm/nwfpe/softfloat.c
@@ -29,8 +29,8 @@ this code that are retained.
 */

 #include "fpa11.h"
-#include "milieu.h"
-#include "softfloat.h"
+//#include "milieu.h"
+//#include "softfloat.h"

 /*
 -------------------------------------------------------------------------------
@@ -142,12 +142,14 @@ INLINE int16 extractFloat32Exp( float32 a )
 Returns the sign bit of the single-precision floating-point value `a'.
 -------------------------------------------------------------------------------
 */
+#if 0	/* in softfloat.h */
 INLINE flag extractFloat32Sign( float32 a )
 {

    return a>>31;

 }
+#endif

 /*
 -------------------------------------------------------------------------------
@@ -321,12 +323,14 @@ INLINE int16 extractFloat64Exp( float64 a )
 Returns the sign bit of the double-precision floating-point value `a'.
 -------------------------------------------------------------------------------
 */
+#if 0	/* in softfloat.h */
 INLINE flag extractFloat64Sign( float64 a )
 {

    return a>>63;

 }
+#endif

 /*
 -------------------------------------------------------------------------------

--- a/arch/arm/nwfpe/softfloat.h
+++ b/arch/arm/nwfpe/softfloat.h
@@ -40,7 +40,9 @@ floating-point format `floatx80'.  If this macro is not defined, the
 input or output the `floatx80' type will be defined.
 -------------------------------------------------------------------------------
 */
+#ifdef CONFIG_FPE_NWFPE_XP
 #define FLOATX80
+#endif

 /*
 -------------------------------------------------------------------------------
@@ -229,4 +231,46 @@ char floatx80_is_signaling_nan( floatx80 );

 #endif

+static inline flag extractFloat32Sign(float32 a)
+{
+	return a >> 31;
+}
+
+static inline flag float32_eq_nocheck(float32 a, float32 b)
+{
+	return (a == b) || ((bits32) ((a | b) << 1) == 0);
+}
+
+static inline flag float32_lt_nocheck(float32 a, float32 b)
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat32Sign(a);
+	bSign = extractFloat32Sign(b);
+	if (aSign != bSign)
+		return aSign && ((bits32) ((a | b) << 1) != 0);
+	return (a != b) && (aSign ^ (a < b));
+}
+
+static inline flag extractFloat64Sign(float64 a)
+{
+	return a >> 63;
+}
+
+static inline flag float64_eq_nocheck(float64 a, float64 b)
+{
+	return (a == b) || ((bits64) ((a | b) << 1) == 0);
+}
+
+static inline flag float64_lt_nocheck(float64 a, float64 b)
+{
+	flag aSign, bSign;
+
+	aSign = extractFloat64Sign(a);
+	bSign = extractFloat64Sign(b);
+	if (aSign != bSign)
+		return aSign && ((bits64) ((a | b) << 1) != 0);
+	return (a != b) && (aSign ^ (a < b));
+}
+
 #endif