Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -481,13 +481,13 @@ // Test if the trunc is the user of a select which is part of a // minimum or maximum operation. If so, don't do any more simplification. - // Even simplifying demanded bits can break the canonical form of a + // Even simplifying demanded bits can break the canonical form of a // min/max. Value *LHS, *RHS; if (SelectInst *SI = dyn_cast(CI.getOperand(0))) if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) @@ -1133,7 +1133,7 @@ Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // If we know that the value being extended is positive, we can use a zext - // instead. + // instead. bool KnownZero, KnownOne; ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); if (KnownZero) { @@ -1421,6 +1421,9 @@ // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, this won't work with // i64 -> float -> i64. +// However, this is also safe if we can establish the range between most and +// least significant set bits of abs(X) fits into the mantissa, and that the +// number of trailing zeros fits into the exponent. Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) { if (!isa(FI.getOperand(0)) && !isa(FI.getOperand(0))) return nullptr; @@ -1446,7 +1449,58 @@ int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned; int ActualSize = std::min(InputSize, OutputSize); - if (ActualSize <= OpITy->getFPMantissaWidth()) { + int MantissaWidth = OpITy->getFPMantissaWidth(); + + bool Safe; + + if (ActualSize <= MantissaWidth) { + Safe = true; + // No need to compute known bits. + } else { + // Now try and see if the integer value fits into the fp type. + int LeastSignificantPossiblySetBit = 0; + int MostSignificantPossiblySetBit = 0; + int HighestNotKnownZero = 0; + uint32_t BitWidth = SrcTy->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + computeKnownBits(SrcI, KnownZero, KnownOne, 0, &FI); + bool PossiblyNegative = IsInputSigned && !KnownZero[BitWidth - 1]; + bool KnownNegative = PossiblyNegative && KnownOne[BitWidth - 1]; + LeastSignificantPossiblySetBit = std::max( + int(KnownZero.countTrailingOnes()) - 1, + 0); + if (!KnownNegative) { + // Only necessary if it's not known negative (possibly positive). + HighestNotKnownZero = std::max(int(BitWidth - + KnownZero.countLeadingOnes()) - 1, + 0); + } + if (!PossiblyNegative) { + MostSignificantPossiblySetBit = HighestNotKnownZero; + } else { + if (KnownOne.countTrailingZeros() == BitWidth - 1) { + MostSignificantPossiblySetBit = BitWidth - 1; + } else { + int LowestNotKnownOne = std::max(int(KnownOne.countTrailingOnes()) + - 1, + 0); + if (LowestNotKnownOne != int(BitWidth - 2)) + ++LowestNotKnownOne; + if (KnownNegative) { + MostSignificantPossiblySetBit = LowestNotKnownOne; + } else { + MostSignificantPossiblySetBit = std::max(HighestNotKnownZero, + LowestNotKnownOne); + } + } + } + int BitRange = MostSignificantPossiblySetBit - + LeastSignificantPossiblySetBit + 1; + Safe = BitRange <= MantissaWidth && MostSignificantPossiblySetBit <= + (int)OpITy->getScalarSizeInBits(); + } + + if (Safe) { if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) { if (IsInputSigned && IsOutputSigned) return new SExtInst(SrcI, FITy); Index: test/Transforms/InstCombine/sitofp.ll =================================================================== --- test/Transforms/InstCombine/sitofp.ll +++ test/Transforms/InstCombine/sitofp.ll @@ -182,3 +182,103 @@ ret i55 %C } +; This should fold because even though the bit width of the integer +; is greater than that of the mantissa, we can establish that the +; actual value of the integer will fit into the mantissa. +; (Bits of %B: 00000000UUUUUUUUUUUUUUUUUUUUUUUU, U = unknown) +; CHECK-LABEL: test20 +; CHECK: and +; CHECK-NEXT: ret i32 +define i32 @test20(i32 %A) nounwind { + %B = and i32 %A, 16777215 + %C = sitofp i32 %B to float + %D = fptosi float %C to i32 + ret i32 %D +} + +; Same as last test, but unsigned. +; CHECK-LABEL: test21 +; CHECK: and +; CHECK-NEXT: ret i32 +define i32 @test21(i32 %A) nounwind { + %B = and i32 %A, 16777215 + %C = uitofp i32 %B to float + %D = fptoui float %C to i32 + ret i32 %D +} + +; This should fold, because even though the effective exponent is +; 2**24, which does not fit into float's exponent, we can just +; shift the mantissa 1 bit to the left to compensate. +; (Bits of %B: 0000000U000000000000000000000000, U = unknown) +; CHECK-LABEL: test22 +; CHECK: and +; CHECK-NEXT: ret i32 +define i32 @test22(i32 %A) nounwind { + %B = and i32 %A, 16777216 + %C = sitofp i32 %B to float + %D = fptosi float %C to i32 + ret i32 %D +} + +; Same as last test, but unsigned. +; CHECK-LABEL: test23 +; CHECK: and +; CHECK-NEXT: ret i32 +define i32 @test23(i32 %A) nounwind { + %B = and i32 %A, 16777216 + %C = uitofp i32 %B to float + %D = fptoui float %C to i32 + ret i32 %D +} + +; This should not fold, because the integer's value does not fit +; into the mantissa. +; (Bits of %B: 0000000UUUUUUUUUUUUUUUUUUUUUUUUU, U = unknown) +; CHECK-LABEL: test24 +; CHECK: sitofp +; CHECK-NEXT: fptosi +define i32 @test24(i32 %A) nounwind { + %B = and i32 %A, 33554431 + %C = sitofp i32 %B to float + %D = fptosi float %C to i32 + ret i32 %D +} + +; Same as last test, but unsigned. +; CHECK-LABEL: test25 +; CHECK: uitofp +; CHECK-NEXT: fptoui +define i32 @test25(i32 %A) nounwind { + %B = and i32 %A, 33554431 + %C = uitofp i32 %B to float + %D = fptoui float %C to i32 + ret i32 %D +} + +; This should fold because due to the sign being set, the highest +; possibly set bit of the absolute value is the bit to the left of +; the highest not known one bit (in this case, position 0 + 1 = 1). +; (Bits of %B: 1111111111111111111111111111111U, U = unknown) +; CHECK-LABEL: test26 +; CHECK: or +; CHECK-NEXT: ret i32 +define i32 @test26(i32 %A) nounwind { + %B = or i32 %A, -2 + %C = sitofp i32 %B to float + %D = fptosi float %C to i32 + ret i32 %D +} + +; This should not fold because of the same property. +; (Bits of %B: 10000000UUUUUUUUUUUUUUUUUUUUUUUU, U = unknown) +; CHECK-LABEL: test27 +; CHECK: sitofp +; CHECK-NEXT: fptosi +define i32 @test27(i32 %A) nounwind { + %B = and i32 %A, 16777215 + %C = or i32 -2147483648, %B + %D = sitofp i32 %C to float + %E = fptosi float %D to i32 + ret i32 %E +}