diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -79,6 +79,7 @@ bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); bool replaceIVUserWithLoopInvariant(Instruction *UseInst); + bool replaceFloatIVWithIntegerIV(Instruction *UseInst); bool eliminateOverflowIntrinsic(WithOverflowInst *WO); bool eliminateSaturatingIntrinsic(SaturatingInst *SI); @@ -673,6 +674,35 @@ return true; } +/// Eliminate redundant type cast between integer and float. +bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { + if (UseInst->getOpcode() != CastInst::SIToFP) + return false; + + Value *IVOperand = UseInst->getOperand(0); + // Get the symbolic expression for this instruction. + ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand)); + unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); + if (IVRange.getActiveBits() <= DestNumSigBits) { + for (User *U : UseInst->users()) { + // Match for fptosi of sitofp and with same type. + auto *CI = dyn_cast(U); + if (!CI || IVOperand->getType() != CI->getType()) + continue; + + CI->replaceAllUsesWith(IVOperand); + DeadInsts.push_back(CI); + LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI + << " with: " << *IVOperand << '\n'); + + ++NumFoldedUser; + Changed = true; + } + } + + return Changed; +} + /// Eliminate any operation that SCEV can prove is an identity function. bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand) { @@ -896,6 +926,13 @@ } } + // Try to use integer induction for FPToSI of float induction directly. + if (replaceFloatIVWithIntegerIV(UseInst)) { + // Re-queue the potentially new direct uses of IVOperand. + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); + continue; + } + CastInst *Cast = dyn_cast(UseInst); if (V && Cast) { V->visitCast(Cast); diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll --- a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll +++ b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll @@ -382,8 +382,7 @@ ; CHECK-NEXT: [[FLOAT_IV_INT:%.*]] = phi i32 [ 1000, [[ENTRY:%.*]] ], [ [[FLOAT_IV_NEXT_INT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[FLOAT_IV_INT]] to float ; CHECK-NEXT: call void @use.float(float [[INDVAR_CONV]]) -; CHECK-NEXT: [[CONV_I32:%.*]] = fptosi float [[INDVAR_CONV]] to i32 -; CHECK-NEXT: call void @use.i32(i32 [[CONV_I32]]) +; CHECK-NEXT: call void @use.i32(i32 [[FLOAT_IV_INT]]) ; CHECK-NEXT: [[CONV_I16:%.*]] = fptosi float [[INDVAR_CONV]] to i16 ; CHECK-NEXT: [[CONV_I64:%.*]] = fptosi float [[INDVAR_CONV]] to i64 ; CHECK-NEXT: call void @use.i16(i16 [[CONV_I16]]) diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -indvars -S | FileCheck %s + +@array = dso_local global [16777219 x i32] zeroinitializer, align 4 + +define void @small_const_bound(i32 %index) { +; CHECK-LABEL: @small_const_bound( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV_INT]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: store i32 [[IV_INT]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[DEC_INT]] = add nsw i32 [[IV_INT]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[DEC_INT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]] +; CHECK: cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %iv.int = phi i32 [ 100, %entry ], [ %dec.int, %for.body ] + %indvar.conv = sitofp i32 %iv.int to float + %conv = fptosi float %indvar.conv to i32 + %idxprom = sext i32 %conv to i64 + %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom + store i32 %conv, i32* %arrayidx, align 4 + %dec.int = add nsw i32 %iv.int, -1 + %cmp = icmp ugt i32 %dec.int, 0 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: ; preds = %for.body + ret void +} + +; Negative test: The transform is *not* valid because there are too many significant bits +define void @overflow_masked_const_bound(i32 %index) { +; CHECK-LABEL: @overflow_masked_const_bound( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ 16777218, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to float +; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[INDVAR_CONV]] to i32 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[CONV]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[DEC_INT]] = add nsw i32 [[IV_INT]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[DEC_INT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]] +; CHECK: cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %iv.int = phi i32 [ 16777218, %entry ], [ %dec.int, %for.body ] ; intermediate 16777218 (= 1 << 24 + 2) + %indvar.conv = sitofp i32 %iv.int to float + %conv = fptosi float %indvar.conv to i32 + %idxprom = sext i32 %conv to i64 + %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom + store i32 %conv, i32* %arrayidx, align 4 + %dec.int = add nsw i32 %iv.int, -1 + %cmp = icmp ugt i32 %dec.int, 0 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: ; preds = %for.body + ret void +} + +; Negative test: Type mismatch between the integer IV and the fptosi result +define void @mismatch_type_const(i32 %index) { +; +; CHECK-LABEL: @mismatch_type_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV_INT:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to float +; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[INDVAR_CONV]] to i16 +; CHECK-NEXT: [[IDXPROM32:%.*]] = sext i16 [[CONV]] to i32 +; CHECK-NEXT: [[IDXPROM64:%.*]] = sext i16 [[CONV]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IDXPROM64]] +; CHECK-NEXT: store i32 [[IDXPROM32]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[DEC_INT]] = add nsw i32 [[IV_INT]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[DEC_INT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]] +; CHECK: cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %iv.int = phi i32 [ 100, %entry ], [ %dec.int, %for.body ] + %indvar.conv = sitofp i32 %iv.int to float + %conv = fptosi float %indvar.conv to i16 + %idxprom32 = sext i16 %conv to i32 + %idxprom64 = sext i16 %conv to i64 + %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom64 + store i32 %idxprom32, i32* %arrayidx, align 4 + %dec.int = add nsw i32 %iv.int, -1 + %cmp = icmp ugt i32 %dec.int, 0 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: ; preds = %for.body + ret void +}