diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -613,6 +613,11 @@ bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const; + /// Return true if LSR major cost is register number. Targets which implement + /// their own isLSRCostLess and unset register number as major cost should + /// return false, otherwise return true. + bool isRegNumMajorCostOfLSR() const; + /// \returns true if LSR should not optimize a chain that includes \p I. bool isProfitableLSRChainElement(Instruction *I) const; @@ -1410,6 +1415,7 @@ Instruction *I) = 0; virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; + virtual bool isRegNumMajorCostOfLSR() = 0; virtual bool isProfitableLSRChainElement(Instruction *I) = 0; virtual bool canMacroFuseCmp() = 0; virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, @@ -1731,6 +1737,9 @@ TargetTransformInfo::LSRCost &C2) override { return Impl.isLSRCostLess(C1, C2); } + bool isRegNumMajorCostOfLSR() override { + return Impl.isRegNumMajorCostOfLSR(); + } bool isProfitableLSRChainElement(Instruction *I) override { return Impl.isProfitableLSRChainElement(I); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -192,6 +192,8 @@ C2.ScaleCost, C2.ImmCost, C2.SetupCost); } + bool isRegNumMajorCostOfLSR() { return true; } + bool isProfitableLSRChainElement(Instruction *I) { return false; } bool canMacroFuseCmp() { return false; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -264,6 +264,10 @@ return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); } + bool isRegNumMajorCostOfLSR() { + return TargetTransformInfoImplBase::isRegNumMajorCostOfLSR(); + } + bool isProfitableLSRChainElement(Instruction *I) { return TargetTransformInfoImplBase::isProfitableLSRChainElement(I); } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -377,6 +377,10 @@ return TTIImpl->isLSRCostLess(C1, C2); } +bool TargetTransformInfo::isRegNumMajorCostOfLSR() const { + return TTIImpl->isRegNumMajorCostOfLSR(); +} + bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const { return TTIImpl->isProfitableLSRChainElement(I); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -75,6 +75,7 @@ TTI::PeelingPreferences &PP); bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); + bool isRegNumMajorCostOfLSR(); /// @} diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1204,6 +1204,10 @@ return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); } +bool PPCTTIImpl::isRegNumMajorCostOfLSR() { + return false; +} + bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { switch (Inst->getIntrinsicID()) { diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2857,13 +2857,20 @@ unsigned NumVarIncrements = 0; unsigned NumReusedIncrements = 0; - if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst)) - return true; - - for (const IVInc &Inc : Chain) { + // If any LSRUse in the chain is marked as profitable by target, mark this + // chain as profitable. + for (const IVInc &Inc : Chain.Incs) if (TTI.isProfitableLSRChainElement(Inc.UserInst)) return true; + // If register number is the major cost, we cannot benefit from this + // profitable chain which is based on register number. + // FIXME: add profitable chain optimization for other kinds major cost, for + // example instruction number. + if (!TTI.isRegNumMajorCostOfLSR()) + return false; + + for (const IVInc &Inc : Chain) { if (Inc.IncExpr->isZero()) continue; diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -16,85 +16,90 @@ ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: bge 0, .LBB0_6 ; CHECK-NEXT: # %bb.1: # %.preheader -; CHECK-NEXT: mulld 11, 8, 5 -; CHECK-NEXT: mulld 12, 9, 8 -; CHECK-NEXT: sldi 0, 10, 3 -; CHECK-NEXT: sldi 11, 11, 3 -; CHECK-NEXT: sldi 12, 12, 3 -; CHECK-NEXT: add 11, 3, 11 +; CHECK-NEXT: addi 30, 5, 1 +; CHECK-NEXT: addi 29, 5, 3 +; CHECK-NEXT: addi 28, 5, 2 +; CHECK-NEXT: mulld 12, 8, 5 +; CHECK-NEXT: addi 3, 3, 16 +; CHECK-NEXT: mulld 0, 9, 8 +; CHECK-NEXT: sldi 11, 10, 3 +; CHECK-NEXT: mulld 30, 8, 30 +; CHECK-NEXT: mulld 29, 8, 29 +; CHECK-NEXT: mulld 8, 8, 28 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: add 5, 5, 9 -; CHECK-NEXT: add 11, 11, 12 +; CHECK-NEXT: add 12, 12, 0 +; CHECK-NEXT: add 30, 30, 0 +; CHECK-NEXT: add 29, 29, 0 +; CHECK-NEXT: add 8, 8, 0 ; CHECK-NEXT: cmpd 5, 7 ; CHECK-NEXT: bge 0, .LBB0_6 ; CHECK-NEXT: .LBB0_3: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_5 Depth 2 -; CHECK-NEXT: sub 30, 5, 10 -; CHECK-NEXT: cmpd 6, 30 +; CHECK-NEXT: sub 28, 5, 10 +; CHECK-NEXT: cmpd 6, 28 ; CHECK-NEXT: bge 0, .LBB0_2 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: addi 28, 5, 1 -; CHECK-NEXT: addi 27, 5, 2 -; CHECK-NEXT: addi 26, 5, 3 -; CHECK-NEXT: mulld 29, 5, 8 -; CHECK-NEXT: sldi 25, 6, 3 -; CHECK-NEXT: mulld 28, 28, 8 -; CHECK-NEXT: mulld 27, 27, 8 -; CHECK-NEXT: mulld 26, 26, 8 -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 -; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lfdx 0, 4, 25 -; CHECK-NEXT: lfdx 1, 11, 25 +; CHECK-NEXT: add 26, 6, 12 +; CHECK-NEXT: add 25, 6, 30 ; CHECK-NEXT: add 24, 6, 29 -; CHECK-NEXT: mr 23, 3 -; CHECK-NEXT: xsadddp 0, 0, 1 +; CHECK-NEXT: add 23, 6, 8 +; CHECK-NEXT: sldi 27, 6, 3 +; CHECK-NEXT: sldi 26, 26, 3 +; CHECK-NEXT: sldi 25, 25, 3 ; CHECK-NEXT: sldi 24, 24, 3 +; CHECK-NEXT: sldi 23, 23, 3 +; CHECK-NEXT: add 27, 4, 27 +; CHECK-NEXT: add 26, 3, 26 +; CHECK-NEXT: add 25, 3, 25 ; CHECK-NEXT: add 24, 3, 24 -; CHECK-NEXT: lfd 1, 8(24) +; CHECK-NEXT: add 23, 3, 23 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: lfd 0, 0(27) +; CHECK-NEXT: lfd 1, -16(26) +; CHECK-NEXT: add 6, 6, 10 +; CHECK-NEXT: cmpd 6, 28 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 16(24) +; CHECK-NEXT: lfd 1, -8(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 24(24) -; CHECK-NEXT: add 24, 6, 28 -; CHECK-NEXT: sldi 24, 24, 3 +; CHECK-NEXT: lfd 1, 0(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfdux 1, 23, 24 -; CHECK-NEXT: add 24, 6, 27 -; CHECK-NEXT: sldi 24, 24, 3 +; CHECK-NEXT: lfd 1, 8(26) +; CHECK-NEXT: add 26, 26, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(23) +; CHECK-NEXT: lfd 1, -16(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 16(23) +; CHECK-NEXT: lfd 1, -8(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 24(23) -; CHECK-NEXT: mr 23, 3 -; CHECK-NEXT: lfdux 2, 23, 24 -; CHECK-NEXT: add 24, 6, 26 -; CHECK-NEXT: add 6, 6, 10 -; CHECK-NEXT: sldi 24, 24, 3 -; CHECK-NEXT: cmpd 6, 30 +; CHECK-NEXT: lfd 1, 0(25) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(23) -; CHECK-NEXT: xsadddp 0, 0, 2 +; CHECK-NEXT: lfd 1, 8(25) +; CHECK-NEXT: add 25, 25, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 16(23) +; CHECK-NEXT: lfd 1, -16(23) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 24(23) -; CHECK-NEXT: mr 23, 3 +; CHECK-NEXT: lfd 1, -8(23) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfdux 1, 23, 24 +; CHECK-NEXT: lfd 1, 0(23) ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 8(23) +; CHECK-NEXT: add 23, 23, 11 +; CHECK-NEXT: xsadddp 0, 0, 1 +; CHECK-NEXT: lfd 1, -16(24) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 16(23) +; CHECK-NEXT: lfd 1, -8(24) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 24(23) +; CHECK-NEXT: lfd 1, 0(24) +; CHECK-NEXT: xsadddp 0, 0, 1 +; CHECK-NEXT: lfd 1, 8(24) +; CHECK-NEXT: add 24, 24, 11 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: stfdx 0, 4, 25 -; CHECK-NEXT: add 25, 25, 0 +; CHECK-NEXT: stfd 0, 0(27) +; CHECK-NEXT: add 27, 27, 11 ; CHECK-NEXT: blt 0, .LBB0_5 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_6: