diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -2213,8 +2213,11 @@ // when truncating from PowerPC double-double to double format), the // right shift could lose result mantissa bits. Adjust exponent instead // of performing excessive shift. + // Also do a similar trick in case shifting denormal would produce zero + // significand as this case isn't handled correctly by normalize. if (shift < 0 && isFiniteNonZero()) { - int exponentChange = significandMSB() + 1 - fromSemantics.precision; + int omsb = significandMSB() + 1; + int exponentChange = omsb - fromSemantics.precision; if (exponent + exponentChange < toSemantics.minExponent) exponentChange = toSemantics.minExponent - exponent; if (exponentChange < shift) @@ -2222,6 +2225,10 @@ if (exponentChange < 0) { shift -= exponentChange; exponent += exponentChange; + } else if (omsb <= -shift) { + exponentChange = omsb + shift - 1; // leave at least one bit set + shift -= exponentChange; + exponent += exponentChange; } } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll --- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll @@ -79,21 +79,17 @@ ret float %b } -; FIXME: This should be 0.0. - define float @trunc_denorm_lost_fraction1() { ; CHECK-LABEL: @trunc_denorm_lost_fraction1( -; CHECK-NEXT: ret float 0x36A0000000000000 +; CHECK-NEXT: ret float 0.000000e+00 ; %b = fptrunc double 0x0000000010000001 to float ret float %b } -; FIXME: This should be 0.0. - define float @trunc_denorm_lost_fraction2() { ; CHECK-LABEL: @trunc_denorm_lost_fraction2( -; CHECK-NEXT: ret float 0x36A0000000000000 +; CHECK-NEXT: ret float 0.000000e+00 ; %b = fptrunc double 0x000000001fffffff to float ret float %b @@ -107,11 +103,9 @@ ret float %b } -; FIXME: This should be -0.0. - define float @trunc_denorm_lost_fraction4() { ; CHECK-LABEL: @trunc_denorm_lost_fraction4( -; CHECK-NEXT: ret float 0xB6A0000000000000 +; CHECK-NEXT: ret float -0.000000e+00 ; %b = fptrunc double 0x8000000010000001 to float ret float %b diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -1859,6 +1859,48 @@ EXPECT_EQ(0x7fc00000, test.bitcastToAPInt()); EXPECT_TRUE(losesInfo); EXPECT_EQ(status, APFloat::opOK); + + // Test that subnormals are handled correctly in double to float conversion + test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000000p-1022"); + test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + + test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000001p-1022"); + test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + + test = APFloat(APFloat::IEEEdouble(), "-0x0.0000010000001p-1022"); + test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + + test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000000p-1022"); + test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + + test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000001p-1022"); + test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + + // Test subnormal conversion to bfloat + test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126"); + test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + + test = APFloat(APFloat::IEEEsingle(), "0x0.02p-126"); + test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(0x01, test.bitcastToAPInt()); + EXPECT_FALSE(losesInfo); + + test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126"); + test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo); + EXPECT_EQ(0x01, test.bitcastToAPInt()); + EXPECT_TRUE(losesInfo); } TEST(APFloatTest, PPCDoubleDouble) {