diff --git a/compiler-rt/lib/builtins/fp_div_impl.inc b/compiler-rt/lib/builtins/fp_div_impl.inc --- a/compiler-rt/lib/builtins/fp_div_impl.inc +++ b/compiler-rt/lib/builtins/fp_div_impl.inc @@ -347,12 +347,17 @@ // effectively doubling its value as well as its error estimation. residualLo = (aSignificand << (significandBits + 1)) - quotient_UQ1 * bSignificand; writtenExponent -= 1; + aSignificand <<= 1; } else { // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it // to UQ1.SB by right shifting by 1. Least significant bit is omitted. quotient_UQ1 >>= 1; residualLo = (aSignificand << significandBits) - quotient_UQ1 * bSignificand; } + // NB: residualLo is calculated above for the normal result case. + // It is re-computed on denormal path that is expected to be not so + // performance-sensitive. + // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB // Each NextAfter() increments the floating point value by at least 2^-SB // (more, if exponent was incremented). @@ -380,19 +385,25 @@ // Now, quotient_UQ1_SB <= the correctly-rounded result // and may need taking NextAfter() up to 3 times (see error estimates above) // r = a - b * q + rep_t absResult; + if (writtenExponent > 0) { + // Clear the implicit bit + absResult = quotient_UQ1 & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + residualLo <<= 1; + } else { + // Prevent shift amount from being negative + if (significandBits + writtenExponent < 0) + return fromRep(quotientSign); - if (writtenExponent < 0) { - // Result is definitely subnormal, flushing to zero - return fromRep(quotientSign); - } + absResult = quotient_UQ1 >> (-writtenExponent + 1); - // Clear the implicit bit - rep_t absResult = quotient_UQ1 & significandMask; - // Insert the exponent - absResult |= (rep_t)writtenExponent << significandBits; + // multiplied by two to prevent shift amount to be negative + residualLo = (aSignificand << (significandBits + writtenExponent)) - (absResult * bSignificand << 1); + } // Round - residualLo <<= 1; residualLo += absResult & 1; // tie to even // The above line conditionally turns the below LT comparison into LTE absResult += residualLo > bSignificand; @@ -403,11 +414,5 @@ #if defined(QUAD_PRECISION) absResult += absResult < infRep && residualLo > (4 + 1) * bSignificand; #endif - - if ((absResult & ~significandMask) == 0) { - // Result is subnormal, flushing to zero - return fromRep(quotientSign); - } - // Result is normal, insert the sign and return return fromRep(absResult | quotientSign); } diff --git a/compiler-rt/test/builtins/Unit/divdf3_test.c b/compiler-rt/test/builtins/Unit/divdf3_test.c --- a/compiler-rt/test/builtins/Unit/divdf3_test.c +++ b/compiler-rt/test/builtins/Unit/divdf3_test.c @@ -92,6 +92,13 @@ if (test__divdf3(0x1.0p+0, 0x1.00000001p+0, UINT64_C(0x3fefffffffe00000))) return 1; + // smallest normal value divided by 2.0 + if (test__divdf3(0x1.0p-1022, 2., UINT64_C(0x0008000000000000))) + return 1; + // smallest subnormal result + if (test__divdf3(0x1.0p-1022, 0x1.0p+52, UINT64_C(0x0000000000000001))) + return 1; + // some misc test cases obtained by fuzzing against h/w implementation if (test__divdf3(0x1.fdc239dd64735p-658, -0x1.fff9364c0843fp-948, UINT64_C(0xd20fdc8fc0ceffb1))) return 1; @@ -99,6 +106,12 @@ return 1; if (test__divdf3(-0x1.da7dfe6048b8bp-875, 0x1.ffc7ea3ff60a4p-610, UINT64_C(0xaf5dab1fe0269e2a))) return 1; + if (test__divdf3(0x1.0p-1022, 0x1.9p+5, UINT64_C(0x000051eb851eb852))) + return 1; + if (test__divdf3(0x1.0p-1022, 0x1.0028p+41, UINT64_C(0x00000000000007ff))) + return 1; + if (test__divdf3(0x1.0p-1022, 0x1.0028p+52, UINT64_C(0x1))) + return 1; return 0; } diff --git a/compiler-rt/test/builtins/Unit/divsf3_test.c b/compiler-rt/test/builtins/Unit/divsf3_test.c --- a/compiler-rt/test/builtins/Unit/divsf3_test.c +++ b/compiler-rt/test/builtins/Unit/divsf3_test.c @@ -92,5 +92,20 @@ if (test__divsf3(0x1.0p+0F, 0x1.0001p+0F, UINT32_C(0x3f7fff00))) return 1; + // smallest normal value divided by 2.0 + if (test__divsf3(0x1.0p-126F, 2.0F, UINT32_C(0x00400000))) + return 1; + // smallest subnormal result + if (test__divsf3(0x1.0p-126F, 0x1p+23F, UINT32_C(0x00000001))) + return 1; + + // some misc test cases obtained by fuzzing against h/w implementation + if (test__divsf3(-0x1.3e75e6p-108F, -0x1.cf372p+38F, UINT32_C(0x00000006))) + return 1; + if (test__divsf3(0x1.e77c54p+81F, -0x1.e77c52p-47F, UINT32_C(0xff800000))) + return 1; + if (test__divsf3(0x1.fffffep-126F, 2.F, UINT32_C(0x00800000))) + return 1; + return 0; } diff --git a/compiler-rt/test/builtins/Unit/divtf3_test.c b/compiler-rt/test/builtins/Unit/divtf3_test.c --- a/compiler-rt/test/builtins/Unit/divtf3_test.c +++ b/compiler-rt/test/builtins/Unit/divtf3_test.c @@ -146,6 +146,13 @@ UINT64_C(0xfffe000000000000))) return 1; + // smallest normal value divided by 2.0 + if (test__divtf3(0x1.0p-16382L, 2.L, UINT64_C(0x0000800000000000), UINT64_C(0x0))) + return 1; + // smallest subnormal result + if (test__divtf3(0x1.0p-1022L, 0x1p+52L, UINT64_C(0x0), UINT64_C(0x1))) + return 1; + // any / any if (test__divtf3(0x1.a23b45362464523375893ab4cdefp+5L, 0x1.eedcbaba3a94546558237654321fp-1L,