Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1720,7 +1720,7 @@ /// Return true if the cast from integer to FP can be proven to be exact for all /// possible inputs (the conversion does not lose any precision). -static bool isKnownExactCastIntToFP(CastInst &I) { +static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) { CastInst::CastOps Opcode = I.getOpcode(); assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) && "Unexpected cast"); @@ -1754,9 +1754,14 @@ return true; } - // TODO: // Try harder to find if the source integer type has less significant bits. // For example, compute number of sign bits or compute low bit mask. + KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I); + int LowBits = + (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros(); + if (LowBits <= DestNumSigBits) + return true; + return false; } @@ -1937,7 +1942,7 @@ Value *Src = FPT.getOperand(0); if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) { auto *FPCast = cast<CastInst>(Src); - if (isKnownExactCastIntToFP(*FPCast)) + if (isKnownExactCastIntToFP(*FPCast, *this)) return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty); } @@ -1951,7 +1956,7 @@ Value *Src = FPExt.getOperand(0); if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) { auto *FPCast = cast<CastInst>(Src); - if (isKnownExactCastIntToFP(*FPCast)) + if (isKnownExactCastIntToFP(*FPCast, *this)) return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty); } @@ -1978,7 +1983,7 @@ // This means this is also safe for a signed input and unsigned output, since // a negative input would lead to undefined behavior. - if (!isKnownExactCastIntToFP(*OpI)) { + if (!isKnownExactCastIntToFP(*OpI, *this)) { // The first cast may not round exactly based on the source integer width // and FP width, but the overflow UB rules can still allow this to fold. // If the destination type is narrow, that means the intermediate FP value Index: llvm/test/Transforms/InstCombine/fptrunc.ll =================================================================== --- llvm/test/Transforms/InstCombine/fptrunc.ll +++ llvm/test/Transforms/InstCombine/fptrunc.ll @@ -182,8 +182,7 @@ define half @ItoFtoF_u25_f32_f16(i25 %i) { ; CHECK-LABEL: @ItoFtoF_u25_f32_f16( -; CHECK-NEXT: [[X:%.*]] = uitofp i25 [[I:%.*]] to float -; CHECK-NEXT: [[R:%.*]] = fptrunc float [[X]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp i25 [[I:%.*]] to half ; CHECK-NEXT: ret half [[R]] ; %x = uitofp i25 %i to float Index: llvm/test/Transforms/InstCombine/sitofp.ll =================================================================== --- llvm/test/Transforms/InstCombine/sitofp.ll +++ llvm/test/Transforms/InstCombine/sitofp.ll @@ -218,16 +218,38 @@ ret i55 %C } -; TODO: The mask guarantees that the input is small enough to eliminate the FP casts. +; The mask guarantees that the input is small enough to eliminate the FP casts. define i25 @masked_input(i25 %A) { ; CHECK-LABEL: @masked_input( ; CHECK-NEXT: [[M:%.*]] = and i25 [[A:%.*]], 65535 +; CHECK-NEXT: ret i25 [[M]] +; + %m = and i25 %A, 65535 + %B = uitofp i25 %m to float + %C = fptoui float %B to i25 + ret i25 %C +} + +define i25 @max_masked_input(i25 %A) { +; CHECK-LABEL: @max_masked_input( +; CHECK-NEXT: [[M:%.*]] = and i25 [[A:%.*]], 16777215 +; CHECK-NEXT: ret i25 [[M]] +; + %m = and i25 %A, 16777215 ; max intermediate 16777215 (= 1 << 24)-1 + %B = uitofp i25 %m to float + %C = fptoui float %B to i25 + ret i25 %C +} + +define i25 @overflow_masked_input(i25 %A) { +; CHECK-LABEL: @overflow_masked_input( +; CHECK-NEXT: [[M:%.*]] = and i25 [[A:%.*]], -16777216 ; CHECK-NEXT: [[B:%.*]] = uitofp i25 [[M]] to float ; CHECK-NEXT: [[C:%.*]] = fptoui float [[B]] to i25 ; CHECK-NEXT: ret i25 [[C]] ; - %m = and i25 %A, 65535 + %m = and i25 %A, 16777216 ; Negative test - intermediate 16777216 (= 1 << 24) %B = uitofp i25 %m to float %C = fptoui float %B to i25 ret i25 %C