Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -481,13 +481,13 @@ // Test if the trunc is the user of a select which is part of a // minimum or maximum operation. If so, don't do any more simplification. - // Even simplifying demanded bits can break the canonical form of a + // Even simplifying demanded bits can break the canonical form of a // min/max. Value *LHS, *RHS; if (SelectInst *SI = dyn_cast(CI.getOperand(0))) if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) @@ -1133,7 +1133,7 @@ Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // If we know that the value being extended is positive, we can use a zext - // instead. + // instead. bool KnownZero, KnownOne; ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); if (KnownZero) { @@ -1418,6 +1418,10 @@ // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, this won't work with // i64 -> float -> i64. +// However, this is also safe if we can establish the range between most and +// least significant set bits of abs(X) fits into the mantissa, and that the +// number of trailing zeros fits into the exponent. + Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) { if (!isa(FI.getOperand(0)) && !isa(FI.getOperand(0))) return nullptr; @@ -1443,7 +1447,42 @@ int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned; int ActualSize = std::min(InputSize, OutputSize); - if (ActualSize <= OpITy->getFPMantissaWidth()) { + int MantissaWidth = OpITy->getFPMantissaWidth(); + + bool safe; + + if (ActualSize <= MantissaWidth) { + safe = true; + // No need to compute known bits. + } else { + // Now try and see if the integer value fits into the fp type. + int LeastSignificantPossiblySetBit = 0; + int MostSignificantPossiblySetBit = 0; + int BitRange = 0; + uint32_t BitWidth = SrcTy->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + computeKnownBits(SrcI, KnownZero, KnownOne, 0, &FI); + for (unsigned i = 0; i < BitWidth - 1; i++) { + if (!KnownZero[i]) { + LeastSignificantPossiblySetBit = i; + break; + } + } + for (unsigned i = 0, j; i < BitWidth - 1; i++) { + j = BitWidth - i - 1; + if (!KnownZero[j]) { + bool offsetOne = (IsInputSigned || !KnownZero[BitWidth - 1]) && + (!KnownOne[BitWidth - 1] || KnownZero.countTrailingOnes() < j); + MostSignificantPossiblySetBit = j - offsetOne; + break; + } + } + BitRange = MostSignificantPossiblySetBit - LeastSignificantPossiblySetBit + 1; + safe = BitRange < MantissaWidth && LeastSignificantPossiblySetBit < + ((int)OpITy->getScalarSizeInBits() - 1 - BitRange); + } + + if (safe) { if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) { if (IsInputSigned && IsOutputSigned) return new SExtInst(SrcI, FITy); Index: test/Transforms/InstCombine/sitofp.ll =================================================================== --- test/Transforms/InstCombine/sitofp.ll +++ test/Transforms/InstCombine/sitofp.ll @@ -182,3 +182,28 @@ ret i55 %C } +; This should fold because even though the bit width of the integer +; is greater than that of the mantissa, we can establish that the +; actual value of the integer will fit into the mantissa. +; CHECK-LABEL: test20 +; CHECK: and +; CHECK-NEST: ret i32 +define i32 @test20(i32 %A) nounwind { + %B = and i32 %A, 16777215 + %C = sitofp i32 %B to float + %D = fptosi float %C to i32 + ret i32 %D +} + +; This should fold, because even though the effective exponent is +; 2**24, which does not fit into float's exponent, we can just +; shift the mantissa 1 bit to the left to compensate. +; CHECK-LABEL: test21 +; CHECK: and +; CHECK-NEST: ret i32 +define i32 @test21(i32 %A) nounwind { + %B = and i32 %A, 16777216 + %C = sitofp i32 %B to float + %D = fptosi float %C to i32 + ret i32 %D +}