Commit fb24dfcb authored by Ralph Siemsen's avatar Ralph Siemsen Committed by Russell King

[NWFPE] performance improvements [Part 4]

Fourth part of NWFPE performance improvements, building on patch 1466/1.
 
This patch makes 80-bit (extended precision) a compile-time option in NWFPE.
The effect of the extra code path on 32/64 bit operations is not as significant
as I had first thought - speedup is between 0.7 and 1.5% depending on which
tree (rmk or nw) you do the changes in.  However the reduction in code size,
particularly when softfloat has the same functions cut out, may be of value
on embedded systems.
parent a64e20ce
......@@ -584,6 +584,18 @@ config FPE_NWFPE
You may say N here if you are going to load the Acorn FPEmulator
early in the bootup.
config FPE_NWFPE_XP
bool "Support extended precision"
depends on FPE_NWFPE
help
Say Y to include 80-bit support in the kernel floating-point
emulator. Otherwise, only 32 and 64-bit support is compiled in.
Note that gcc does not generate 80-bit operations by default,
so in most cases this option only enlarges the size of the
floating point emulator without any good reason.
You almost surely want to say N here.
config FPE_FASTFPE
tristate "FastFPE math emulation (EXPERIMENTAL)"
depends on !CPU_26 && !CPU_32v3 && EXPERIMENTAL
......
......@@ -3,6 +3,7 @@
Used "indent -kr -i8 -ts8 -sob -l132 -ss" and a few manual fixups.
* Removed dead code and fixed function protypes to match definitions.
* Consolidated use of (opcode && MASK_ARITHMETIC_OPCODE) >> 20.
* Make 80-bit precision a compile-time option.
2002-01-19 Russell King <rmk@arm.linux.org.uk>
......
......@@ -10,7 +10,11 @@ obj-$(CONFIG_FPE_NWFPE) += nwfpe.o
nwfpe-objs := fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \
fpmodule.o fpopcode.o softfloat.o \
single_cpdo.o double_cpdo.o extended_cpdo.o
single_cpdo.o double_cpdo.o
ifeq ($(CONFIG_FPE_NWFPE_XP),y)
nwfpe-objs += extended_cpdo.o
endif
ifeq ($(CONFIG_CPU_26),y)
nwfpe-objs += entry26.o
......
/*
NetWinder Floating Point Emulator
(c) Rebel.COM, 1998,1999
(c) Philip Blundell, 2001
Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
......@@ -73,6 +74,7 @@ void SetRoundingMode(const unsigned int opcode)
void SetRoundingPrecision(const unsigned int opcode)
{
#ifdef CONFIG_FPE_NWFPE_XP
switch (opcode & MASK_ROUNDING_PRECISION) {
case ROUND_SINGLE:
floatx80_rounding_precision = 32;
......@@ -89,6 +91,7 @@ void SetRoundingPrecision(const unsigned int opcode)
default:
floatx80_rounding_precision = 80;
}
#endif
}
void nwfpe_init(union fp_state *fp)
......
......@@ -37,6 +37,7 @@ register unsigned int *user_registers asm("sl");
/* includes */
#include "fpsr.h" /* FP control and status register definitions */
#include "milieu.h"
#include "softfloat.h"
#define typeNone 0x00
......@@ -48,9 +49,13 @@ register unsigned int *user_registers asm("sl");
* This must be no more and no less than 12 bytes.
*/
typedef union tagFPREG {
floatx80 fExtended;
float64 fDouble;
float32 fSingle;
float64 fDouble;
#ifdef CONFIG_FPE_NWFPE_XP
floatx80 fExtended;
#else
int padding[3];
#endif
} FPREG;
/*
......
/*
NetWinder Floating Point Emulator
(c) Rebel.COM, 1998,1999
(c) Philip Blundell, 2001
Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
......@@ -66,9 +67,11 @@ unsigned int EmulateCPDO(const unsigned int opcode)
case typeDouble:
nRc = DoubleCPDO(opcode, rFd);
break;
#ifdef CONFIG_FPE_NWFPE_XP
case typeExtended:
nRc = ExtendedCPDO(opcode, rFd);
break;
#endif
default:
nRc = 0;
}
......@@ -83,6 +86,7 @@ unsigned int EmulateCPDO(const unsigned int opcode)
fpa11->fType[getFd(opcode)] = nDest;
#ifdef CONFIG_FPE_NWFPE_XP
if (nDest != nType) {
switch (nDest) {
case typeSingle:
......@@ -113,6 +117,14 @@ unsigned int EmulateCPDO(const unsigned int opcode)
break;
}
}
#else
if (nDest != nType) {
if (nDest == typeSingle)
rFd->fSingle = float64_to_float32(rFd->fDouble);
else
rFd->fDouble = float32_to_float64(rFd->fSingle);
}
#endif
}
return nRc;
......
/*
NetWinder Floating Point Emulator
(c) Rebel.com, 1998-1999
(c) Philip Blundell, 1998
(c) Philip Blundell, 1998, 2001
Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
......@@ -45,6 +45,7 @@ static inline void loadDouble(const unsigned int Fn, const unsigned int *pMem)
get_user(p[1], &pMem[0]); /* sign & exponent */
}
#ifdef CONFIG_FPE_NWFPE_XP
static inline void loadExtended(const unsigned int Fn, const unsigned int *pMem)
{
FPA11 *fpa11 = GET_FPA11();
......@@ -55,6 +56,7 @@ static inline void loadExtended(const unsigned int Fn, const unsigned int *pMem)
get_user(p[1], &pMem[2]); /* ls bits */
get_user(p[2], &pMem[1]); /* ms bits */
}
#endif
static inline void loadMultiple(const unsigned int Fn, const unsigned int *pMem)
{
......@@ -76,6 +78,7 @@ static inline void loadMultiple(const unsigned int Fn, const unsigned int *pMem)
}
break;
#ifdef CONFIG_FPE_NWFPE_XP
case typeExtended:
{
get_user(p[1], &pMem[2]);
......@@ -83,6 +86,7 @@ static inline void loadMultiple(const unsigned int Fn, const unsigned int *pMem)
p[0] = (x & 0x80003fff);
}
break;
#endif
}
}
......@@ -99,9 +103,11 @@ static inline void storeSingle(const unsigned int Fn, unsigned int *pMem)
val.f = float64_to_float32(fpa11->fpreg[Fn].fDouble);
break;
#ifdef CONFIG_FPE_NWFPE_XP
case typeExtended:
val.f = floatx80_to_float32(fpa11->fpreg[Fn].fExtended);
break;
#endif
default:
val.f = fpa11->fpreg[Fn].fSingle;
......@@ -123,9 +129,11 @@ static inline void storeDouble(const unsigned int Fn, unsigned int *pMem)
val.f = float32_to_float64(fpa11->fpreg[Fn].fSingle);
break;
#ifdef CONFIG_FPE_NWFPE_XP
case typeExtended:
val.f = floatx80_to_float64(fpa11->fpreg[Fn].fExtended);
break;
#endif
default:
val.f = fpa11->fpreg[Fn].fDouble;
......@@ -135,6 +143,7 @@ static inline void storeDouble(const unsigned int Fn, unsigned int *pMem)
put_user(val.i[0], &pMem[1]); /* lsw */
}
#ifdef CONFIG_FPE_NWFPE_XP
static inline void storeExtended(const unsigned int Fn, unsigned int *pMem)
{
FPA11 *fpa11 = GET_FPA11();
......@@ -160,6 +169,7 @@ static inline void storeExtended(const unsigned int Fn, unsigned int *pMem)
put_user(val.i[1], &pMem[2]);
put_user(val.i[2], &pMem[1]); /* msw */
}
#endif
static inline void storeMultiple(const unsigned int Fn, unsigned int *pMem)
{
......@@ -179,6 +189,7 @@ static inline void storeMultiple(const unsigned int Fn, unsigned int *pMem)
}
break;
#ifdef CONFIG_FPE_NWFPE_XP
case typeExtended:
{
put_user(p[2], &pMem[1]); /* msw */
......@@ -186,6 +197,7 @@ static inline void storeMultiple(const unsigned int Fn, unsigned int *pMem)
put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]);
}
break;
#endif
}
}
......@@ -218,9 +230,11 @@ unsigned int PerformLDF(const unsigned int opcode)
case TRANSFER_DOUBLE:
loadDouble(getFd(opcode), pAddress);
break;
#ifdef CONFIG_FPE_NWFPE_XP
case TRANSFER_EXTENDED:
loadExtended(getFd(opcode), pAddress);
break;
#endif
default:
nRc = 0;
}
......@@ -261,9 +275,11 @@ unsigned int PerformSTF(const unsigned int opcode)
case TRANSFER_DOUBLE:
storeDouble(getFd(opcode), pAddress);
break;
#ifdef CONFIG_FPE_NWFPE_XP
case TRANSFER_EXTENDED:
storeExtended(getFd(opcode), pAddress);
break;
#endif
default:
nRc = 0;
}
......
/*
NetWinder Floating Point Emulator
(c) Rebel.COM, 1998,1999
(c) Philip Blundell, 1999
(c) Philip Blundell, 1999, 2001
Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
......@@ -21,14 +21,14 @@
*/
#include "fpa11.h"
#include "milieu.h"
#include "softfloat.h"
#include "fpopcode.h"
#include "fpa11.inl"
#include "fpmodule.h"
#include "fpmodule.inl"
#ifdef CONFIG_FPE_NWFPE_XP
extern flag floatx80_is_nan(floatx80);
#endif
extern flag float64_is_nan(float64);
extern flag float32_is_nan(float32);
......@@ -41,23 +41,22 @@ static unsigned int PerformComparison(const unsigned int opcode);
unsigned int EmulateCPRT(const unsigned int opcode)
{
unsigned int nRc = 1;
if (opcode & 0x800000) {
/* This is some variant of a comparison (PerformComparison will
sort out which one). Since most of the other CPRT
instructions are oddball cases of some sort or other it makes
sense to pull this out into a fast path. */
/* This is some variant of a comparison (PerformComparison
will sort out which one). Since most of the other CPRT
instructions are oddball cases of some sort or other it
makes sense to pull this out into a fast path. */
return PerformComparison(opcode);
}
/* Hint to GCC that we'd like a jump table rather than a load of CMPs */
switch ((opcode & 0x700000) >> 20) {
case FLT_CODE >> 20:
nRc = PerformFLT(opcode);
return PerformFLT(opcode);
break;
case FIX_CODE >> 20:
nRc = PerformFIX(opcode);
return PerformFIX(opcode);
break;
case WFS_CODE >> 20:
......@@ -67,28 +66,18 @@ unsigned int EmulateCPRT(const unsigned int opcode)
writeRegister(getRd(opcode), readFPSR());
break;
#if 0 /* We currently have no use for the FPCR, so there's no point
in emulating it. */
case WFC_CODE >> 20:
writeFPCR(readRegister(getRd(opcode)));
case RFC_CODE >> 20:
writeRegister(getRd(opcode), readFPCR());
break;
#endif
default:
nRc = 0;
return 0;
}
return nRc;
return 1;
}
unsigned int PerformFLT(const unsigned int opcode)
{
FPA11 *fpa11 = GET_FPA11();
unsigned int nRc = 1;
SetRoundingMode(opcode);
SetRoundingPrecision(opcode);
switch (opcode & MASK_ROUNDING_PRECISION) {
case ROUND_SINGLE:
......@@ -105,24 +94,25 @@ unsigned int PerformFLT(const unsigned int opcode)
}
break;
#ifdef CONFIG_FPE_NWFPE_XP
case ROUND_EXTENDED:
{
fpa11->fType[getFn(opcode)] = typeExtended;
fpa11->fpreg[getFn(opcode)].fExtended = int32_to_floatx80(readRegister(getRd(opcode)));
}
break;
#endif
default:
nRc = 0;
return 0;
}
return nRc;
return 1;
}
unsigned int PerformFIX(const unsigned int opcode)
{
FPA11 *fpa11 = GET_FPA11();
unsigned int nRc = 1;
unsigned int Fn = getFm(opcode);
SetRoundingMode(opcode);
......@@ -140,39 +130,18 @@ unsigned int PerformFIX(const unsigned int opcode)
}
break;
#ifdef CONFIG_FPE_NWFPE_XP
case typeExtended:
{
writeRegister(getRd(opcode), floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
}
break;
#endif
default:
nRc = 0;
}
return nRc;
}
static unsigned int __inline__ PerformComparisonOperation(floatx80 Fn, floatx80 Fm)
{
unsigned int flags = 0;
/* test for less than condition */
if (floatx80_lt(Fn, Fm)) {
flags |= CC_NEGATIVE;
}
/* test for equal condition */
if (floatx80_eq(Fn, Fm)) {
flags |= CC_ZERO;
}
/* test for greater than or equal condition */
if (floatx80_lt(Fm, Fn)) {
flags |= CC_CARRY;
return 0;
}
writeConditionCodes(flags);
return 1;
}
......@@ -180,14 +149,13 @@ static unsigned int __inline__ PerformComparisonOperation(floatx80 Fn, floatx80
static unsigned int PerformComparison(const unsigned int opcode)
{
FPA11 *fpa11 = GET_FPA11();
unsigned int Fn, Fm;
floatx80 rFn, rFm;
unsigned int Fn = getFn(opcode), Fm = getFm(opcode);
int e_flag = opcode & 0x400000; /* 1 if CxFE */
int n_flag = opcode & 0x200000; /* 1 if CNxx */
unsigned int flags = 0;
Fn = getFn(opcode);
Fm = getFm(opcode);
#ifdef CONFIG_FPE_NWFPE_XP
floatx80 rFn, rFm;
/* Check for unordered condition and convert all operands to 80-bit
format.
......@@ -254,11 +222,131 @@ static unsigned int PerformComparison(const unsigned int opcode)
}
}
if (n_flag) {
if (n_flag)
rFm.high ^= 0x8000;
/* test for less than condition */
if (floatx80_lt(rFn, rFm))
flags |= CC_NEGATIVE;
/* test for equal condition */
if (floatx80_eq(rFn, rFm))
flags |= CC_ZERO;
/* test for greater than or equal condition */
if (floatx80_lt(rFm, rFn))
flags |= CC_CARRY;
#else
if (CONSTANT_FM(opcode)) {
/* Fm is a constant. Do the comparison in whatever precision
Fn happens to be stored in. */
if (fpa11->fType[Fn] == typeSingle) {
float32 rFm = getSingleConstant(Fm);
float32 rFn = fpa11->fpreg[Fn].fSingle;
if (float32_is_nan(rFn))
goto unordered;
if (n_flag)
rFm ^= 0x80000000;
/* test for less than condition */
if (float32_lt_nocheck(rFn, rFm))
flags |= CC_NEGATIVE;
/* test for equal condition */
if (float32_eq_nocheck(rFn, rFm))
flags |= CC_ZERO;
/* test for greater than or equal condition */
if (float32_lt_nocheck(rFm, rFn))
flags |= CC_CARRY;
} else {
float64 rFm = getDoubleConstant(Fm);
float64 rFn = fpa11->fpreg[Fn].fDouble;
if (float64_is_nan(rFn))
goto unordered;
if (n_flag)
rFm ^= 0x8000000000000000ULL;
/* test for less than condition */
if (float64_lt_nocheck(rFn, rFm))
flags |= CC_NEGATIVE;
/* test for equal condition */
if (float64_eq_nocheck(rFn, rFm))
flags |= CC_ZERO;
/* test for greater than or equal condition */
if (float64_lt_nocheck(rFm, rFn))
flags |= CC_CARRY;
}
} else {
/* Both operands are in registers. */
if (fpa11->fType[Fn] == typeSingle
&& fpa11->fType[Fm] == typeSingle) {
float32 rFm = fpa11->fpreg[Fm].fSingle;
float32 rFn = fpa11->fpreg[Fn].fSingle;
if (float32_is_nan(rFn)
|| float32_is_nan(rFm))
goto unordered;
if (n_flag)
rFm ^= 0x80000000;
/* test for less than condition */
if (float32_lt_nocheck(rFn, rFm))
flags |= CC_NEGATIVE;
/* test for equal condition */
if (float32_eq_nocheck(rFn, rFm))
flags |= CC_ZERO;
/* test for greater than or equal condition */
if (float32_lt_nocheck(rFm, rFn))
flags |= CC_CARRY;
} else {
/* Promote 32-bit operand to 64 bits. */
float64 rFm, rFn;
rFm = (fpa11->fType[Fm] == typeSingle) ?
float32_to_float64(fpa11->fpreg[Fm].fSingle)
: fpa11->fpreg[Fm].fDouble;
rFn = (fpa11->fType[Fn] == typeSingle) ?
float32_to_float64(fpa11->fpreg[Fn].fSingle)
: fpa11->fpreg[Fn].fDouble;
if (float64_is_nan(rFn)
|| float64_is_nan(rFm))
goto unordered;
if (n_flag)
rFm ^= 0x8000000000000000ULL;
/* test for less than condition */
if (float64_lt_nocheck(rFn, rFm))
flags |= CC_NEGATIVE;
/* test for equal condition */
if (float64_eq_nocheck(rFn, rFm))
flags |= CC_ZERO;
/* test for greater than or equal condition */
if (float64_lt_nocheck(rFm, rFn))
flags |= CC_CARRY;
}
}
return PerformComparisonOperation(rFn, rFm);
#endif
writeConditionCodes(flags);
return 1;
unordered:
/* ?? The FPA data sheet is pretty vague about this, in particular
......
......@@ -42,11 +42,17 @@
#include "fpa11.inl"
/* kernel symbols required for signal handling */
#ifdef CONFIG_FPE_NWFPE_XP
#define NWFPE_BITS "extended"
#else
#define NWFPE_BITS "double"
#endif
#ifdef MODULE
void fp_send_sig(unsigned long sig, struct task_struct *p, int priv);
#if LINUX_VERSION_CODE > 0x20115
MODULE_AUTHOR("Scott Bambrough <scottb@rebel.com>");
MODULE_DESCRIPTION("NWFPE floating point emulator");
MODULE_DESCRIPTION("NWFPE floating point emulator (" NWFPE_BITS " precision)");
#endif
#else
......@@ -85,8 +91,8 @@ static int __init fpe_init(void)
return 0;
/* Display title, version and copyright information. */
printk(KERN_WARNING "NetWinder Floating Point Emulator V0.95 "
"(c) 1998-1999 Rebel.com\n");
printk(KERN_WARNING "NetWinder Floating Point Emulator V0.97 ("
NWFPE_BITS " precision)\n");
/* Save pointer to the old FP handler and then patch ourselves in */
orig_fp_enter = kern_fp_enter;
......
......@@ -26,6 +26,7 @@
#include "fpmodule.h"
#include "fpmodule.inl"
#ifdef CONFIG_FPE_NWFPE_XP
const floatx80 floatx80Constant[] = {
{0x0000, 0x0000000000000000ULL}, /* extended 0.0 */
{0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */
......@@ -36,6 +37,7 @@ const floatx80 floatx80Constant[] = {
{0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */
{0x4002, 0xa000000000000000ULL} /* extended 10.0 */
};
#endif
const float64 float64Constant[] = {
0x0000000000000000ULL, /* double 0.0 */
......
/*
NetWinder Floating Point Emulator
(c) Rebel.COM, 1998,1999
(c) Philip Blundell, 2001
Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
......@@ -366,11 +367,13 @@ TABLE 5
/* Get the rounding mode from the opcode. */
#define getRoundingMode(opcode) ((opcode & MASK_ROUNDING_MODE) >> 5)
#ifdef CONFIG_FPE_NWFPE_XP
static inline const floatx80 getExtendedConstant(const unsigned int nIndex)
{
extern const floatx80 floatx80Constant[];
return floatx80Constant[nIndex];
}
#endif
static inline const float64 getDoubleConstant(const unsigned int nIndex)
{
......
......@@ -29,8 +29,8 @@ this code that are retained.
*/
#include "fpa11.h"
#include "milieu.h"
#include "softfloat.h"
//#include "milieu.h"
//#include "softfloat.h"
/*
-------------------------------------------------------------------------------
......@@ -142,12 +142,14 @@ INLINE int16 extractFloat32Exp( float32 a )
Returns the sign bit of the single-precision floating-point value `a'.
-------------------------------------------------------------------------------
*/
#if 0 /* in softfloat.h */
INLINE flag extractFloat32Sign( float32 a )
{
return a>>31;
}
#endif
/*
-------------------------------------------------------------------------------
......@@ -321,12 +323,14 @@ INLINE int16 extractFloat64Exp( float64 a )
Returns the sign bit of the double-precision floating-point value `a'.
-------------------------------------------------------------------------------
*/
#if 0 /* in softfloat.h */
INLINE flag extractFloat64Sign( float64 a )
{
return a>>63;
}
#endif
/*
-------------------------------------------------------------------------------
......
......@@ -40,7 +40,9 @@ floating-point format `floatx80'. If this macro is not defined, the
input or output the `floatx80' type will be defined.
-------------------------------------------------------------------------------
*/
#ifdef CONFIG_FPE_NWFPE_XP
#define FLOATX80
#endif
/*
-------------------------------------------------------------------------------
......@@ -229,4 +231,46 @@ char floatx80_is_signaling_nan( floatx80 );
#endif
static inline flag extractFloat32Sign(float32 a)
{
return a >> 31;
}
static inline flag float32_eq_nocheck(float32 a, float32 b)
{
return (a == b) || ((bits32) ((a | b) << 1) == 0);
}
static inline flag float32_lt_nocheck(float32 a, float32 b)
{
flag aSign, bSign;
aSign = extractFloat32Sign(a);
bSign = extractFloat32Sign(b);
if (aSign != bSign)
return aSign && ((bits32) ((a | b) << 1) != 0);
return (a != b) && (aSign ^ (a < b));
}
static inline flag extractFloat64Sign(float64 a)
{
return a >> 63;
}
static inline flag float64_eq_nocheck(float64 a, float64 b)
{
return (a == b) || ((bits64) ((a | b) << 1) == 0);
}
static inline flag float64_lt_nocheck(float64 a, float64 b)
{
flag aSign, bSign;
aSign = extractFloat64Sign(a);
bSign = extractFloat64Sign(b);
if (aSign != bSign)
return aSign && ((bits64) ((a | b) << 1) != 0);
return (a != b) && (aSign ^ (a < b));
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment