Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -481,13 +481,13 @@ // Test if the trunc is the user of a select which is part of a // minimum or maximum operation. If so, don't do any more simplification. - // Even simplifying demanded bits can break the canonical form of a + // Even simplifying demanded bits can break the canonical form of a // min/max. Value *LHS, *RHS; if (SelectInst *SI = dyn_cast(CI.getOperand(0))) if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) @@ -1133,7 +1133,7 @@ Type *SrcTy = Src->getType(), *DestTy = CI.getType(); // If we know that the value being extended is positive, we can use a zext - // instead. + // instead. bool KnownZero, KnownOne; ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); if (KnownZero) { @@ -1418,6 +1418,10 @@ // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, this won't work with // i64 -> float -> i64. +// However, this is also safe if we can establish the range between most and +// least significant set bits of abs(X) fits into the mantissa, and that the +// number of trailing zeros fits into the exponent. + Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) { if (!isa(FI.getOperand(0)) && !isa(FI.getOperand(0))) return nullptr; @@ -1443,7 +1447,35 @@ int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned; int ActualSize = std::min(InputSize, OutputSize); - if (ActualSize <= OpITy->getFPMantissaWidth()) { + int MantissaWidth = OpITy->getFPMantissaWidth(); + + bool safe; + + if (ActualSize <= MantissaWidth) { + safe = true; + // No need to compute known bits. + } else { + // Now try and see if the integer value fits into the fp type. + // Calculate the number of bits between the least and most significant + // possibly set (not known zero) bits. + uint32_t BitWidth = SrcTy->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + computeKnownBits(SrcI, KnownZero, KnownOne, 0, &FI); + bool PossiblyNegative = IsInputSigned && !KnownZero[BitWidth - 1]; + int LeastSignificantPossiblySetBit = KnownZero.countLeadingOnes(); + int MostSignificantPossiblySetBit = KnownZero.countTrailingOnes() - + PossiblyNegative; + int BitRange = MostSignificantPossiblySetBit - + LeastSignificantPossiblySetBit; + // If there are more than this many trailing zeros, the exponent is too + // high and cannot be compensated for by shifting the mantissa to the left + int MostPossibleTrailingZeros = (int)OpITy->getScalarSizeInBits() - 1 - + BitRange; + safe = BitRange < MantissaWidth && LeastSignificantPossiblySetBit < + MostPossibleTrailingZeros; + } + + if (safe) { if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) { if (IsInputSigned && IsOutputSigned) return new SExtInst(SrcI, FITy); Index: test/Transforms/InstCombine/sitofp.ll =================================================================== --- test/Transforms/InstCombine/sitofp.ll +++ test/Transforms/InstCombine/sitofp.ll @@ -182,3 +182,41 @@ ret i55 %C } +; This should fold because even though the bit width of the integer +; is greater than that of the mantissa, we can establish that the +; actual value of the integer will fit into the mantissa. +; CHECK-LABEL: test20 +; CHECK: and +; CHECK-NEXT: ret i32 +define i32 @test20(i32 %A) nounwind { + %B = and i32 %A, 16777215 + %C = sitofp i32 %B to float + %D = fptosi float %C to i32 + ret i32 %D +} + +; This should fold, because even though the effective exponent is +; 2**24, which does not fit into float's exponent, we can just +; shift the mantissa to the left to compensate. +; CHECK-LABEL: test21 +; CHECK: and +; CHECK-NEXT: ret i32 +define i32 @test21(i32 %A) nounwind { + %B = and i32 %A, 16777216 + %C = sitofp i32 %B to float + %D = fptosi float %C to i32 + ret i32 %D +} + +; This can't fold because even though the significant bits fit into +; the mantissa, the exponent is too high. +; CHECK-LABEL: test22 +; CHECK: and +; CHECK-NEXT: sitofp +; CHECK-NEXT: fptosi +define i64 @test22(i64 %A) nounwind { + %B = and i64 %A, 8589934080 + %C = sitofp i64 %B to float + %D = fptosi float %C to i64 + ret i64 %D +}