Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -392,9 +392,31 @@ // // We give preference to sitofp over uitofp because it is faster on most // platforms. + assert(isInt<32>(InitValue) && isInt<32>(IncValue) && isInt<32>(ExitValue) && + "Integer value of a non-constant number"); if (WeakPH) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", &*PN->getParent()->getFirstInsertionPt()); + + // Now we only support NewPHI type i32, so if [abs(IncValue), + // abs(ExitValue)] is in the range of (1 << DestNumSigBits), then we can use + // NewPHI directly. + // TODO: make use of these low zero bits of values IncValue and ExitValue. + int DestNumSigBits = PN->getType()->getFPMantissaWidth(); + if ((std::abs(InitValue) <= (1 << DestNumSigBits)) && + (std::abs(ExitValue) <= (1 << DestNumSigBits))) { + for (User *U : PN->users()) { + if (auto *CI = dyn_cast(U)) { + CastInst::CastOps Opcode = CI->getOpcode(); + assert((Opcode == CastInst::FPToSI || Opcode == CastInst::FPToUI) && + "Unexpected cast"); + CI->replaceUsesWithIf( + NewPHI, [&](Use &U) -> bool { return DT->dominates(NewPHI, U); }); + } + } + } + + // Use Conv to replace rest items that do not meet the above requirements. PN->replaceAllUsesWith(Conv); RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); } Index: llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -indvars -adce -S | FileCheck %s + +@array = dso_local global [101 x i32] zeroinitializer, align 4 + +define void @small_const_bound(i32 %index) { +; CHECK-LABEL: @small_const_bound( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_010_INT:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I_010_INT]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* @array, i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: store i32 [[I_010_INT]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[DEC_INT]] = add nsw i32 [[I_010_INT]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[DEC_INT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]] +; CHECK: cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.010 = phi float [ 1.000000e+02, %entry ], [ %dec, %for.body ] + %conv = fptosi float %i.010 to i32 + %idxprom = sext i32 %conv to i64 + %arrayidx = getelementptr inbounds [101 x i32], [101 x i32]* @array, i64 0, i64 %idxprom + store i32 %conv, i32* %arrayidx, align 4 + %dec = fadd fast float %i.010, -1.000000e+00 + %cmp = fcmp fast ogt float %dec, 0.000000e+00 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: ; preds = %for.body + ret void +} +