diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -613,6 +613,9 @@ /// cost should return false, otherwise return true. bool isNumRegsMajorCostOfLSR() const; + /// Return true if LSR attempts to replace primary IV with other IV. + bool isAllowTerminatingConditionFoldingAfterLSR() const; + /// \returns true if LSR should not optimize a chain that includes \p I. bool isProfitableLSRChainElement(Instruction *I) const; @@ -1622,6 +1625,7 @@ virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) = 0; virtual bool isNumRegsMajorCostOfLSR() = 0; + virtual bool isAllowTerminatingConditionFoldingAfterLSR() const = 0; virtual bool isProfitableLSRChainElement(Instruction *I) = 0; virtual bool canMacroFuseCmp() = 0; virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, @@ -2035,6 +2039,9 @@ bool isNumRegsMajorCostOfLSR() override { return Impl.isNumRegsMajorCostOfLSR(); } + bool isAllowTerminatingConditionFoldingAfterLSR() const override { + return Impl.isAllowTerminatingConditionFoldingAfterLSR(); + } bool isProfitableLSRChainElement(Instruction *I) override { return Impl.isProfitableLSRChainElement(I); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -220,6 +220,8 @@ bool isNumRegsMajorCostOfLSR() const { return true; } + bool isAllowTerminatingConditionFoldingAfterLSR() const { return false; } + bool isProfitableLSRChainElement(Instruction *I) const { return false; } bool canMacroFuseCmp() const { return false; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -356,6 +356,11 @@ return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR(); } + bool isAllowTerminatingConditionFoldingAfterLSR() const { + return TargetTransformInfoImplBase:: + isAllowTerminatingConditionFoldingAfterLSR(); + } + bool isProfitableLSRChainElement(Instruction *I) { return TargetTransformInfoImplBase::isProfitableLSRChainElement(I); } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -364,6 +364,10 @@ return TTIImpl->isNumRegsMajorCostOfLSR(); } +bool TargetTransformInfo::isAllowTerminatingConditionFoldingAfterLSR() const { + return TTIImpl->isAllowTerminatingConditionFoldingAfterLSR(); +} + bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const { return TTIImpl->isProfitableLSRChainElement(I); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -308,6 +308,8 @@ } llvm_unreachable("unknown register class"); } + + bool isAllowTerminatingConditionFoldingAfterLSR() const; }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -968,3 +968,7 @@ // TODO: Figure out constant materialization cost modeling and remove. return SLPMaxVF; } + +bool RISCVTTIImpl::isAllowTerminatingConditionFoldingAfterLSR() const { + return true; +} diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6771,7 +6771,8 @@ } } - if (AllowTerminatingConditionFoldingAfterLSR) { + if (AllowTerminatingConditionFoldingAfterLSR || + TTI.isAllowTerminatingConditionFoldingAfterLSR()) { auto CanFoldTerminatingCondition = canFoldTermCondOfLoop(L, SE, DT, LI); if (CanFoldTerminatingCondition) { BasicBlock *LoopPreheader = L->getLoopPreheader(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -778,19 +778,18 @@ ; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: add a6, a1, a6 -; CHECK-NEXT: li a7, 32 -; CHECK-NEXT: li t0, 5 -; CHECK-NEXT: mv t1, a5 +; CHECK-NEXT: add a7, a3, a0 +; CHECK-NEXT: li t0, 32 +; CHECK-NEXT: li t1, 5 ; CHECK-NEXT: .LBB12_3: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a7, e8, m1, ta, mu -; CHECK-NEXT: vlse8.v v8, (a6), t0 +; CHECK-NEXT: vsetvli zero, t0, e8, m1, ta, mu +; CHECK-NEXT: vlse8.v v8, (a6), t1 ; CHECK-NEXT: vle8.v v9, (a2) ; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: addi t1, t1, -32 ; CHECK-NEXT: addi a2, a2, 32 ; CHECK-NEXT: addi a6, a6, 160 -; CHECK-NEXT: bnez t1, .LBB12_3 +; CHECK-NEXT: bne a2, a7, .LBB12_3 ; CHECK-NEXT: # %bb.4: ; CHECK-NEXT: beq a4, a5, .LBB12_7 ; CHECK-NEXT: .LBB12_5: diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -283,17 +283,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB7_7 ; CHECK-NEXT: .LBB7_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB7_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: mulw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB7_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: mulw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB7_6 ; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -376,17 +376,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB8_7 ; CHECK-NEXT: .LBB8_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB8_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: addw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB8_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: addw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB8_6 ; CHECK-NEXT: .LBB8_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -469,17 +469,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB9_7 ; CHECK-NEXT: .LBB9_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB9_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: addw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB9_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: addw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB9_6 ; CHECK-NEXT: .LBB9_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -562,17 +562,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB10_7 ; CHECK-NEXT: .LBB10_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB10_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: subw a2, a1, a2 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB10_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: subw a3, a1, a3 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB10_6 ; CHECK-NEXT: .LBB10_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -655,17 +655,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB11_7 ; CHECK-NEXT: .LBB11_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB11_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: and a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB11_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: and a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB11_6 ; CHECK-NEXT: .LBB11_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -748,17 +748,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB12_7 ; CHECK-NEXT: .LBB12_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB12_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: or a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB12_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: or a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB12_6 ; CHECK-NEXT: .LBB12_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -841,17 +841,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB13_7 ; CHECK-NEXT: .LBB13_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB13_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: xor a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB13_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: xor a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB13_6 ; CHECK-NEXT: .LBB13_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1042,17 +1042,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB17_7 ; CHECK-NEXT: .LBB17_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB17_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: sllw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB17_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: sllw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB17_6 ; CHECK-NEXT: .LBB17_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1135,17 +1135,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB18_7 ; CHECK-NEXT: .LBB18_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB18_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: srlw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB18_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: srlw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB18_6 ; CHECK-NEXT: .LBB18_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1228,17 +1228,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a3, .LBB19_7 ; CHECK-NEXT: .LBB19_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB19_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a1, 0(a0) -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: sw a1, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB19_6 +; CHECK-NEXT: lw a2, 0(a1) +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: sw a2, 0(a1) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: bne a1, a0, .LBB19_6 ; CHECK-NEXT: .LBB19_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1536,17 +1536,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB26_7 ; CHECK-NEXT: .LBB26_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB26_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: flw ft0, 0(a1) ; CHECK-NEXT: fmul.s ft0, ft0, fa0 -; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB26_6 +; CHECK-NEXT: fsw ft0, 0(a1) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: bne a1, a0, .LBB26_6 ; CHECK-NEXT: .LBB26_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1628,17 +1628,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB27_7 ; CHECK-NEXT: .LBB27_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB27_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: flw ft0, 0(a1) ; CHECK-NEXT: fdiv.s ft0, ft0, fa0 -; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB27_6 +; CHECK-NEXT: fsw ft0, 0(a1) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: bne a1, a0, .LBB27_6 ; CHECK-NEXT: .LBB27_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1720,17 +1720,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB28_7 ; CHECK-NEXT: .LBB28_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB28_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: flw ft0, 0(a1) ; CHECK-NEXT: fdiv.s ft0, fa0, ft0 -; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB28_6 +; CHECK-NEXT: fsw ft0, 0(a1) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: bne a1, a0, .LBB28_6 ; CHECK-NEXT: .LBB28_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1812,17 +1812,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB29_7 ; CHECK-NEXT: .LBB29_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB29_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: flw ft0, 0(a1) ; CHECK-NEXT: fadd.s ft0, ft0, fa0 -; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB29_6 +; CHECK-NEXT: fsw ft0, 0(a1) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: bne a1, a0, .LBB29_6 ; CHECK-NEXT: .LBB29_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1904,17 +1904,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB30_7 ; CHECK-NEXT: .LBB30_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB30_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: flw ft0, 0(a1) ; CHECK-NEXT: fsub.s ft0, ft0, fa0 -; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB30_6 +; CHECK-NEXT: fsw ft0, 0(a1) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: bne a1, a0, .LBB30_6 ; CHECK-NEXT: .LBB30_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1996,17 +1996,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB31_7 ; CHECK-NEXT: .LBB31_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB31_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: flw ft0, 0(a1) ; CHECK-NEXT: fsub.s ft0, fa0, ft0 -; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB31_6 +; CHECK-NEXT: fsw ft0, 0(a1) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: bne a1, a0, .LBB31_6 ; CHECK-NEXT: .LBB31_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2173,20 +2173,20 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB34_7 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a4, -1024 ; CHECK-NEXT: slli a3, a4, 2 -; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a2, a1, a3 ; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: .LBB34_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: flw ft0, 0(a0) -; CHECK-NEXT: flw ft1, 0(a1) +; CHECK-NEXT: flw ft1, 0(a2) ; CHECK-NEXT: fmadd.s ft0, ft0, fa0, ft1 ; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB34_6 +; CHECK-NEXT: bne a2, a1, .LBB34_6 ; CHECK-NEXT: .LBB34_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2276,20 +2276,20 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB35_7 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a4, -1024 ; CHECK-NEXT: slli a3, a4, 2 -; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a2, a1, a3 ; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: .LBB35_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: flw ft0, 0(a0) -; CHECK-NEXT: flw ft1, 0(a1) +; CHECK-NEXT: flw ft1, 0(a2) ; CHECK-NEXT: fmadd.s ft0, fa0, ft0, ft1 ; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB35_6 +; CHECK-NEXT: bne a2, a1, .LBB35_6 ; CHECK-NEXT: .LBB35_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2602,17 +2602,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB42_7 ; CHECK-NEXT: .LBB42_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB42_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: divuw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB42_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: divuw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB42_6 ; CHECK-NEXT: .LBB42_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2695,17 +2695,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB43_7 ; CHECK-NEXT: .LBB43_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB43_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: divw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB43_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: divw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB43_6 ; CHECK-NEXT: .LBB43_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2788,17 +2788,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB44_7 ; CHECK-NEXT: .LBB44_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB44_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: remuw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB44_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: remuw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB44_6 ; CHECK-NEXT: .LBB44_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2881,17 +2881,17 @@ ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB45_7 ; CHECK-NEXT: .LBB45_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB45_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: remw a2, a2, a1 -; CHECK-NEXT: sw a2, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a3, .LBB45_6 +; CHECK-NEXT: lw a3, 0(a2) +; CHECK-NEXT: remw a3, a3, a1 +; CHECK-NEXT: sw a3, 0(a2) +; CHECK-NEXT: addi a2, a2, 4 +; CHECK-NEXT: bne a2, a0, .LBB45_6 ; CHECK-NEXT: .LBB45_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: