diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -1977,14 +1977,59 @@ return fs; } -/* Rounding-mode corrrect round to integral value. */ +/* Rounding-mode correct round to integral value. */ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { opStatus fs; + if (isInfinity()) + // [IEEE Std 754-2008 6.1]: + // The behavior of infinity in floating-point arithmetic is derived from the + // limiting cases of real arithmetic with operands of arbitrarily + // large magnitude, when such a limit exists. + // ... + // Operations on infinite operands are usually exact and therefore signal no + // exceptions ... + return opOK; + + if (isNaN()) { + if (isSignaling()) { + // [IEEE Std 754-2008 6.2]: + // Under default exception handling, any operation signaling an invalid + // operation exception and for which a floating-point result is to be + // delivered shall deliver a quiet NaN. + makeQuiet(); + // [IEEE Std 754-2008 6.2]: + // Signaling NaNs shall be reserved operands that, under default exception + // handling, signal the invalid operation exception(see 7.2) for every + // general-computational and signaling-computational operation except for + // the conversions described in 5.12. + return opInvalidOp; + } else { + // [IEEE Std 754-2008 6.2]: + // For an operation with quiet NaN inputs, other than maximum and minimum + // operations, if a floating-point result is to be delivered the result + // shall be a quiet NaN which should be one of the input NaNs. + // ... + // Every general-computational and quiet-computational operation involving + // one or more input NaNs, none of them signaling, shall signal no + // exception, except fusedMultiplyAdd might signal the invalid operation + // exception(see 7.2). + return opOK; + } + } + + if (isZero()) { + // [IEEE Std 754-2008 6.3]: + // ... the sign of the result of conversions, the quantize operation, the + // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is + // the sign of the first or only operand. + return opOK; + } + // If the exponent is large enough, we know that this value is already // integral, and the arithmetic below would potentially cause it to saturate // to +/-Inf. Bail out early instead. - if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) + if (exponent+1 >= (int)semanticsPrecision(*semantics)) return opOK; // The algorithm here is quite simple: we add 2^(p-1), where p is the @@ -1998,19 +2043,18 @@ IEEEFloat MagicConstant(*semantics); fs = MagicConstant.convertFromAPInt(IntegerConstant, false, rmNearestTiesToEven); + assert(fs == opOK); MagicConstant.sign = sign; - if (fs != opOK) - return fs; - - // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. + // Preserve the input sign so that we can handle the case of zero result + // correctly. bool inputSign = isNegative(); fs = add(MagicConstant, rounding_mode); - if (fs != opOK && fs != opInexact) - return fs; - fs = subtract(MagicConstant, rounding_mode); + // Current value and 'MagicConstant' are both integers, so the result of the + // subtraction is always exact according to Sterbenz' lemma. + subtract(MagicConstant, rounding_mode); // Restore the input sign. if (inputSign != isNegative()) diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -1525,6 +1525,124 @@ P = APFloat::getInf(APFloat::IEEEdouble(), true); P.roundToIntegral(APFloat::rmTowardZero); EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() < 0.0); + + APFloat::opStatus St; + + P = APFloat::getNaN(APFloat::IEEEdouble()); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(P.isNaN()); + EXPECT_FALSE(P.isNegative()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat::getNaN(APFloat::IEEEdouble(), true); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(P.isNaN()); + EXPECT_TRUE(P.isNegative()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat::getSNaN(APFloat::IEEEdouble()); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(P.isNaN()); + EXPECT_FALSE(P.isSignaling()); + EXPECT_FALSE(P.isNegative()); + EXPECT_EQ(APFloat::opInvalidOp, St); + + P = APFloat::getSNaN(APFloat::IEEEdouble(), true); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(P.isNaN()); + EXPECT_FALSE(P.isSignaling()); + EXPECT_TRUE(P.isNegative()); + EXPECT_EQ(APFloat::opInvalidOp, St); + + P = APFloat::getInf(APFloat::IEEEdouble()); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(P.isInfinity()); + EXPECT_FALSE(P.isNegative()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat::getInf(APFloat::IEEEdouble(), true); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(P.isInfinity()); + EXPECT_TRUE(P.isNegative()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat::getZero(APFloat::IEEEdouble(), false); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(P.isZero()); + EXPECT_FALSE(P.isNegative()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat::getZero(APFloat::IEEEdouble(), false); + St = P.roundToIntegral(APFloat::rmTowardNegative); + EXPECT_TRUE(P.isZero()); + EXPECT_FALSE(P.isNegative()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat::getZero(APFloat::IEEEdouble(), true); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_TRUE(P.isZero()); + EXPECT_TRUE(P.isNegative()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat::getZero(APFloat::IEEEdouble(), true); + St = P.roundToIntegral(APFloat::rmTowardNegative); + EXPECT_TRUE(P.isZero()); + EXPECT_TRUE(P.isNegative()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat(1E-100); + St = P.roundToIntegral(APFloat::rmTowardNegative); + EXPECT_TRUE(P.isZero()); + EXPECT_FALSE(P.isNegative()); + EXPECT_EQ(APFloat::opInexact, St); + + P = APFloat(1E-100); + St = P.roundToIntegral(APFloat::rmTowardPositive); + EXPECT_EQ(1.0, P.convertToDouble()); + EXPECT_FALSE(P.isNegative()); + EXPECT_EQ(APFloat::opInexact, St); + + P = APFloat(-1E-100); + St = P.roundToIntegral(APFloat::rmTowardNegative); + EXPECT_TRUE(P.isNegative()); + EXPECT_EQ(-1.0, P.convertToDouble()); + EXPECT_EQ(APFloat::opInexact, St); + + P = APFloat(-1E-100); + St = P.roundToIntegral(APFloat::rmTowardPositive); + EXPECT_TRUE(P.isZero()); + EXPECT_TRUE(P.isNegative()); + EXPECT_EQ(APFloat::opInexact, St); + + P = APFloat(10.0); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_EQ(10.0, P.convertToDouble()); + EXPECT_EQ(APFloat::opOK, St); + + P = APFloat(10.5); + St = P.roundToIntegral(APFloat::rmTowardZero); + EXPECT_EQ(10.0, P.convertToDouble()); + EXPECT_EQ(APFloat::opInexact, St); + + P = APFloat(10.5); + St = P.roundToIntegral(APFloat::rmTowardPositive); + EXPECT_EQ(11.0, P.convertToDouble()); + EXPECT_EQ(APFloat::opInexact, St); + + P = APFloat(10.5); + St = P.roundToIntegral(APFloat::rmTowardNegative); + EXPECT_EQ(10.0, P.convertToDouble()); + EXPECT_EQ(APFloat::opInexact, St); + + P = APFloat(10.5); + St = P.roundToIntegral(APFloat::rmNearestTiesToAway); + EXPECT_EQ(11.0, P.convertToDouble()); + EXPECT_EQ(APFloat::opInexact, St); + + P = APFloat(10.5); + St = P.roundToIntegral(APFloat::rmNearestTiesToEven); + EXPECT_EQ(10.0, P.convertToDouble()); + EXPECT_EQ(APFloat::opInexact, St); } TEST(APFloatTest, isInteger) {