Commit 2cfa5825 authored by Douglas Leung's avatar Douglas Leung Committed by Ralf Baechle

MIPS: math-emu: <MADDF|MSUBF>.D: Fix accuracy (64-bit case)

Implement fused multiply-add with correct accuracy.

Fused multiply-add operation has better accuracy than respective
sequential execution of multiply and add operations applied on the
same inputs. This is because accuracy errors accumulate in latter
case.

This patch implements fused multiply-add with the same accuracy
as it is implemented in hardware, using 128-bit intermediate
calculations.

One test case example (raw bits) that this patch fixes:

MADDF.D fd,fs,ft:
  fd = 0x00000ca000000000
  fs = ft = 0x3f40624dd2f1a9fc

Fixes: e24c3bec ("MIPS: math-emu: Add support for the MIPS R6 MADDF FPU instruction")
Fixes: 83d43305 ("MIPS: math-emu: Add support for the MIPS R6 MSUBF FPU instruction")
Signed-off-by: default avatarDouglas Leung <douglas.leung@imgtec.com>
Signed-off-by: default avatarMiodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: default avatarGoran Ferenc <goran.ferenc@imgtec.com>
Signed-off-by: default avatarAleksandar Markovic <aleksandar.markovic@imgtec.com>
Cc: Douglas Leung <douglas.leung@imgtec.com>
Cc: Bo Hu <bohu@google.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jin Qian <jinqian@google.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Petar Jovanovic <petar.jovanovic@imgtec.com>
Cc: Raghu Gandham <raghu.gandham@imgtec.com>
Cc: <stable@vger.kernel.org> # 4.7+
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16891/Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent b3b8e1eb
...@@ -15,18 +15,44 @@ ...@@ -15,18 +15,44 @@
#include "ieee754dp.h" #include "ieee754dp.h"
/* 128 bits shift right logical with rounding. */
void srl128(u64 *hptr, u64 *lptr, int count)
{
u64 low;
if (count >= 128) {
*lptr = *hptr != 0 || *lptr != 0;
*hptr = 0;
} else if (count >= 64) {
if (count == 64) {
*lptr = *hptr | (*lptr != 0);
} else {
low = *lptr;
*lptr = *hptr >> (count - 64);
*lptr |= (*hptr << (128 - count)) != 0 || low != 0;
}
*hptr = 0;
} else {
low = *lptr;
*lptr = low >> count | *hptr << (64 - count);
*lptr |= (low << (64 - count)) != 0;
*hptr = *hptr >> count;
}
}
static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
union ieee754dp y, enum maddf_flags flags) union ieee754dp y, enum maddf_flags flags)
{ {
int re; int re;
int rs; int rs;
u64 rm;
unsigned lxm; unsigned lxm;
unsigned hxm; unsigned hxm;
unsigned lym; unsigned lym;
unsigned hym; unsigned hym;
u64 lrm; u64 lrm;
u64 hrm; u64 hrm;
u64 lzm;
u64 hzm;
u64 t; u64 t;
u64 at; u64 at;
int s; int s;
...@@ -172,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, ...@@ -172,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
ym <<= 64 - (DP_FBITS + 1); ym <<= 64 - (DP_FBITS + 1);
/* /*
* Multiply 64 bits xm, ym to give high 64 bits rm with stickness. * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
*/ */
/* 32 * 32 => 64 */ /* 32 * 32 => 64 */
...@@ -202,81 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, ...@@ -202,81 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
hrm = hrm + (t >> 32); hrm = hrm + (t >> 32);
rm = hrm | (lrm != 0); /* Put explicit bit at bit 126 if necessary */
if ((int64_t)hrm < 0) {
/* lrm = (hrm << 63) | (lrm >> 1);
* Sticky shift down to normal rounding precision. hrm = hrm >> 1;
*/
if ((s64) rm < 0) {
rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
((rm << (DP_FBITS + 1 + 3)) != 0);
re++; re++;
} else {
rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
} }
assert(rm & (DP_HIDDEN_BIT << 3));
if (zc == IEEE754_CLASS_ZERO)
return ieee754dp_format(rs, re, rm);
/* And now the addition */ assert(hrm & (1 << 62));
assert(zm & DP_HIDDEN_BIT);
if (zc == IEEE754_CLASS_ZERO) {
/* /*
* Provide guard,round and stick bit space. * Move explicit bit from bit 126 to bit 55 since the
* ieee754dp_format code expects the mantissa to be
* 56 bits wide (53 + 3 rounding bits).
*/ */
zm <<= 3; srl128(&hrm, &lrm, (126 - 55));
return ieee754dp_format(rs, re, lrm);
}
/* Move explicit bit from bit 52 to bit 126 */
lzm = 0;
hzm = zm << 10;
assert(hzm & (1 << 62));
/* Make the exponents the same */
if (ze > re) { if (ze > re) {
/* /*
* Have to shift y fraction right to align. * Have to shift y fraction right to align.
*/ */
s = ze - re; s = ze - re;
rm = XDPSRS(rm, s); srl128(&hrm, &lrm, s);
re += s; re += s;
} else if (re > ze) { } else if (re > ze) {
/* /*
* Have to shift x fraction right to align. * Have to shift x fraction right to align.
*/ */
s = re - ze; s = re - ze;
zm = XDPSRS(zm, s); srl128(&hzm, &lzm, s);
ze += s; ze += s;
} }
assert(ze == re); assert(ze == re);
assert(ze <= DP_EMAX); assert(ze <= DP_EMAX);
/* Do the addition */
if (zs == rs) { if (zs == rs) {
/* /*
* Generate 28 bit result of adding two 27 bit numbers * Generate 128 bit result by adding two 127 bit numbers
* leaving result in xm, xs and xe. * leaving result in hzm:lzm, zs and ze.
*/ */
zm = zm + rm; hzm = hzm + hrm + (lzm > (lzm + lrm));
lzm = lzm + lrm;
if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ if ((int64_t)hzm < 0) { /* carry out */
zm = XDPSRS1(zm); srl128(&hzm, &lzm, 1);
ze++; ze++;
} }
} else { } else {
if (zm >= rm) { if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
zm = zm - rm; hzm = hzm - hrm - (lzm < lrm);
lzm = lzm - lrm;
} else { } else {
zm = rm - zm; hzm = hrm - hzm - (lrm < lzm);
lzm = lrm - lzm;
zs = rs; zs = rs;
} }
if (zm == 0) if (lzm == 0 && hzm == 0)
return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
/* /*
* Normalize to rounding precision. * Put explicit bit at bit 126 if necessary.
*/ */
while ((zm >> (DP_FBITS + 3)) == 0) { if (hzm == 0) {
zm <<= 1; /* left shift by 63 or 64 bits */
ze--; if ((int64_t)lzm < 0) {
/* MSB of lzm is the explicit bit */
hzm = lzm >> 1;
lzm = lzm << 63;
ze -= 63;
} else {
hzm = lzm;
lzm = 0;
ze -= 64;
}
}
t = 0;
while ((hzm >> (62 - t)) == 0)
t++;
assert(t <= 62);
if (t) {
hzm = hzm << t | lzm >> (64 - t);
lzm = lzm << t;
ze -= t;
} }
} }
return ieee754dp_format(zs, ze, zm); /*
* Move explicit bit from bit 126 to bit 55 since the
* ieee754dp_format code expects the mantissa to be
* 56 bits wide (53 + 3 rounding bits).
*/
srl128(&hzm, &lzm, (126 - 55));
return ieee754dp_format(zs, ze, lzm);
} }
union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment