Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -27,6 +27,10 @@ #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Analysis/AssumptionCache.h" #include namespace llvm { @@ -74,6 +78,25 @@ Ordering == AtomicOrdering::Unordered) && !IsVolatile; } }; +/// Attributes of a target dependent hardware loop. +struct HardwareLoopInfo { + HardwareLoopInfo() = delete; + HardwareLoopInfo(Loop *L) : L(L) {} + Loop *L = nullptr; + BasicBlock *ExitBlock = nullptr; + BranchInst *ExitBranch = nullptr; + const SCEV *ExitCount = nullptr; + IntegerType *CountType = nullptr; + Value *LoopDecrement = nullptr; // Decrement the loop counter by this + // value in every iteration. + bool IsNestingLegal = false; // Can a hardware loop be a parent to + // another hardware loop? + bool CounterInReg = false; // Should loop counter be updated in + // the loop via a phi? + bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, + DominatorTree &DT, bool ForceNestedLoop = false, + bool ForceHardwareLoopPHI = false); +}; /// This pass provides access to the codegen interfaces that are needed /// for IR-level transformations. @@ -448,23 +471,6 @@ void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const; - /// Attributes of a target dependent hardware loop. - struct HardwareLoopInfo { - HardwareLoopInfo() = delete; - HardwareLoopInfo(Loop *L) : L(L) { } - Loop *L = nullptr; - BasicBlock *ExitBlock = nullptr; - BranchInst *ExitBranch = nullptr; - const SCEV *ExitCount = nullptr; - IntegerType *CountType = nullptr; - Value *LoopDecrement = nullptr; // Decrement the loop counter by this - // value in every iteration. - bool IsNestingLegal = false; // Can a hardware loop be a parent to - // another hardware loop? - bool CounterInReg = false; // Should loop counter be updated in - // the loop via a phi? - }; - /// Query the target whether it would be profitable to convert the given loop /// into a hardware loop. bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, @@ -518,6 +524,12 @@ /// calculation for the instructions in a loop. bool canMacroFuseCmp() const; + /// Return true if the target can save a compare for loop count, for example + /// hardware loop saves a compare. + bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) const; + /// \return True is LSR should make efforts to create/preserve post-inc /// addressing mode expressions. bool shouldFavorPostInc() const; @@ -1124,6 +1136,9 @@ virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; virtual bool canMacroFuseCmp() = 0; + virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) = 0; virtual bool shouldFavorPostInc() const = 0; virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; @@ -1374,6 +1389,13 @@ bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); } + + bool canSaveCmp(Loop *L, BranchInst **BI, + ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) override { + return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); + } bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -193,7 +193,7 @@ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, - TTI::HardwareLoopInfo &HWLoopInfo) { + HardwareLoopInfo &HWLoopInfo) { return false; } @@ -221,6 +221,12 @@ bool canMacroFuseCmp() { return false; } + bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) { + return false; + } + bool shouldFavorPostInc() const { return false; } bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -495,7 +495,7 @@ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, - TTI::HardwareLoopInfo &HWLoopInfo) { + HardwareLoopInfo &HWLoopInfo) { return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -40,6 +40,90 @@ }; } +bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, + LoopInfo &LI, DominatorTree &DT, + bool ForceNestedLoop, + bool ForceHardwareLoopPHI) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), + IE = ExitingBlocks.end(); + I != IE; ++I) { + BasicBlock *BB = *I; + + // If we pass the updated counter back through a phi, we need to know + // which latch the updated value will be coming from. + if (!L->isLoopLatch(BB)) + if (ForceHardwareLoopPHI || CounterInReg) + continue; + + const SCEV *EC = SE.getExitCount(L, BB); + if (isa(EC)) + continue; + if (const SCEVConstant *ConstEC = dyn_cast(EC)) { + if (ConstEC->getValue()->isZero()) + continue; + } else if (!SE.isLoopInvariant(EC, L)) + continue; + + if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth()) + continue; + + // If this exiting block is contained in a nested loop, it is not eligible + // for insertion of the branch-and-decrement since the inner loop would + // end up messing up the value in the CTR. + if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop) + continue; + + // We now have a loop-invariant count of loop iterations (which is not the + // constant zero) for which we know that this loop will not exit via this + // existing block. + + // We need to make sure that this block will run on every loop iteration. + // For this to be true, we must dominate all blocks with backedges. Such + // blocks are in-loop predecessors to the header block. + bool NotAlways = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PIE = pred_end(L->getHeader()); + PI != PIE; ++PI) { + if (!L->contains(*PI)) + continue; + + if (!DT.dominates(*I, *PI)) { + NotAlways = true; + break; + } + } + + if (NotAlways) + continue; + + // Make sure this blocks ends with a conditional branch. + Instruction *TI = BB->getTerminator(); + if (!TI) + continue; + + if (BranchInst *BI = dyn_cast(TI)) { + if (!BI->isConditional()) + continue; + + ExitBranch = BI; + } else + continue; + + // Note that this block may not be the loop latch block, even if the loop + // has a latch block. + ExitBlock = *I; + ExitCount = EC; + break; + } + + if (!ExitBlock) + return false; + return true; +} + TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) : TTIImpl(new Model(NoTTIImpl(DL))) {} @@ -167,6 +251,13 @@ return TTIImpl->canMacroFuseCmp(); } +bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI, + ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) const { + return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); +} + bool TargetTransformInfo::shouldFavorPostInc() const { return TTIImpl->shouldFavorPostInc(); } Index: llvm/lib/CodeGen/HardwareLoops.cpp =================================================================== --- llvm/lib/CodeGen/HardwareLoops.cpp +++ llvm/lib/CodeGen/HardwareLoops.cpp @@ -101,7 +101,7 @@ // Given that the target believes the loop to be profitable, try to // convert it. - bool TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo); + bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo); private: ScalarEvolution *SE = nullptr; @@ -139,7 +139,7 @@ void UpdateBranch(Value *EltsRem); public: - HardwareLoop(TTI::HardwareLoopInfo &Info, ScalarEvolution &SE, + HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, const DataLayout &DL) : SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()), ExitCount(Info.ExitCount), @@ -205,7 +205,7 @@ if (containsIrreducibleCFG(RPOT, *LI)) return false; - TTI::HardwareLoopInfo HWLoopInfo(L); + HardwareLoopInfo HWLoopInfo(L); if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) || ForceHardwareLoops) { @@ -225,91 +225,18 @@ return false; } -bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) { +bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { Loop *L = HWLoopInfo.L; LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); - - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), - IE = ExitingBlocks.end(); I != IE; ++I) { - BasicBlock *BB = *I; - - // If we pass the updated counter back through a phi, we need to know - // which latch the updated value will be coming from. - if (!L->isLoopLatch(BB)) { - if ((ForceHardwareLoopPHI.getNumOccurrences() && ForceHardwareLoopPHI) || - HWLoopInfo.CounterInReg) - continue; - } - - const SCEV *EC = SE->getExitCount(L, BB); - if (isa(EC)) - continue; - if (const SCEVConstant *ConstEC = dyn_cast(EC)) { - if (ConstEC->getValue()->isZero()) - continue; - } else if (!SE->isLoopInvariant(EC, L)) - continue; - - if (SE->getTypeSizeInBits(EC->getType()) > - HWLoopInfo.CountType->getBitWidth()) - continue; - - // If this exiting block is contained in a nested loop, it is not eligible - // for insertion of the branch-and-decrement since the inner loop would - // end up messing up the value in the CTR. - if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(BB) != L && - !ForceNestedLoop) - continue; - - // We now have a loop-invariant count of loop iterations (which is not the - // constant zero) for which we know that this loop will not exit via this - // existing block. - - // We need to make sure that this block will run on every loop iteration. - // For this to be true, we must dominate all blocks with backedges. Such - // blocks are in-loop predecessors to the header block. - bool NotAlways = false; - for (pred_iterator PI = pred_begin(L->getHeader()), - PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { - if (!L->contains(*PI)) - continue; - - if (!DT->dominates(*I, *PI)) { - NotAlways = true; - break; - } - } - - if (NotAlways) - continue; - - // Make sure this blocks ends with a conditional branch. - Instruction *TI = BB->getTerminator(); - if (!TI) - continue; - - if (BranchInst *BI = dyn_cast(TI)) { - if (!BI->isConditional()) - continue; - - HWLoopInfo.ExitBranch = BI; - } else - continue; - - // Note that this block may not be the loop latch block, even if the loop - // has a latch block. - HWLoopInfo.ExitBlock = *I; - HWLoopInfo.ExitCount = EC; - break; - } - - if (!HWLoopInfo.ExitBlock) + if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, + ForceHardwareLoopPHI)) return false; + assert( + (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && + "Hardware Loop must have set exit info."); + BasicBlock *Preheader = L->getLoopPreheader(); // If we don't have a preheader, then insert one. Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -184,7 +184,7 @@ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, - TTI::HardwareLoopInfo &HWLoopInfo); + HardwareLoopInfo &HWLoopInfo); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -696,7 +696,7 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, - TTI::HardwareLoopInfo &HWLoopInfo) { + HardwareLoopInfo &HWLoopInfo) { // Low-overhead branches are only supported in the 'low-overhead branch' // extension of v8.1-m. if (!ST->hasLOB() || DisableLowOverheadLoops) Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -56,7 +56,10 @@ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, - TTI::HardwareLoopInfo &HWLoopInfo); + HardwareLoopInfo &HWLoopInfo); + bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "PPCTargetTransformInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" @@ -15,6 +16,7 @@ #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Analysis/LoopIterator.h" using namespace llvm; #define DEBUG_TYPE "ppctti" @@ -492,7 +494,7 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, - TTI::HardwareLoopInfo &HWLoopInfo) { + HardwareLoopInfo &HWLoopInfo) { const PPCTargetMachine &TM = ST->getTargetMachine(); TargetSchedModel SchedModel; SchedModel.init(ST); @@ -880,3 +882,28 @@ return Cost; } +bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT, + AssumptionCache *AC, TargetLibraryInfo *LibInfo) { + // Process nested loops first. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo)) + return false; // Stop search. + + // Bail out if the loop has irreducible control flow. + LoopBlocksRPO RPOT(L); + RPOT.perform(LI); + if (containsIrreducibleCFG(RPOT, *LI)) + return false; + + HardwareLoopInfo HWLoopInfo(L); + + if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) + return false; + + if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT)) + return false; + + *BI = HWLoopInfo.ExitBranch; + return true; +} Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1909,6 +1909,8 @@ ScalarEvolution &SE; DominatorTree &DT; LoopInfo &LI; + AssumptionCache &AC; + TargetLibraryInfo &LibInfo; const TargetTransformInfo &TTI; Loop *const L; bool FavorBackedgeIndex = false; @@ -2047,7 +2049,8 @@ public: LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, - LoopInfo &LI, const TargetTransformInfo &TTI); + LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, + TargetLibraryInfo &LibInfo); bool getChanged() const { return Changed; } @@ -3232,6 +3235,8 @@ } void LSRInstance::CollectFixupsAndInitialFormulae() { + BranchInst *ExitBranch = nullptr; + for (const IVStrideUse &U : IU) { Instruction *UserInst = U.getUser(); // Skip IV users that are part of profitable IV Chains. @@ -3261,6 +3266,14 @@ // equality icmps, thanks to IndVarSimplify. if (ICmpInst *CI = dyn_cast(UserInst)) if (CI->isEquality()) { + // If CI can be saved in some target, like replaced inside hardware loop + // in PowerPC, no need to generate initial formulae for it. + bool saveCmp = false; + if (!ExitBranch) + saveCmp = + TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &LibInfo); + if (saveCmp && CI == cast(ExitBranch->getCondition())) + continue; // Swap the operands if needed to put the OperandValToReplace on the // left, for consistency. Value *NV = CI->getOperand(1); @@ -5479,8 +5492,9 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, - const TargetTransformInfo &TTI) - : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), + const TargetTransformInfo &TTI, AssumptionCache &AC, + TargetLibraryInfo &LibInfo) + : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), LibInfo(LibInfo), TTI(TTI), L(L), FavorBackedgeIndex(EnableBackedgeIndexing && TTI.shouldFavorBackedgeIndex(L)) { // If LoopSimplify form is not available, stay out of trouble. @@ -5677,6 +5691,8 @@ AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); // Requiring LoopSimplify a second time here prevents IVUsers from running // twice, since LoopSimplify was invalidated by running ScalarEvolution. AU.addRequiredID(LoopSimplifyID); @@ -5687,11 +5703,14 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + AssumptionCache &AC, + TargetLibraryInfo &LibInfo) { + bool Changed = false; // Run the main LSR transformation. - Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged(); + Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, LibInfo).getChanged(); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); @@ -5722,14 +5741,17 @@ auto &LI = getAnalysis().getLoopInfo(); const auto &TTI = getAnalysis().getTTI( *L->getHeader()->getParent()); - return ReduceLoopStrength(L, IU, SE, DT, LI, TTI); + auto &AC = getAnalysis().getAssumptionCache( + *L->getHeader()->getParent()); + auto &LibInfo = getAnalysis().getTLI(); + return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, LibInfo); } PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { if (!ReduceLoopStrength(&L, AM.getResult(L, AR), AR.SE, - AR.DT, AR.LI, AR.TTI)) + AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); Index: llvm/test/CodeGen/PowerPC/addi-licm.ll =================================================================== --- llvm/test/CodeGen/PowerPC/addi-licm.ll +++ llvm/test/CodeGen/PowerPC/addi-licm.ll @@ -18,8 +18,8 @@ ; CHECK: addi [[REG1:[0-9]+]], 1, ; CHECK: addi [[REG2:[0-9]+]], 1, ; CHECK: %for.body.i -; CHECK-DAG: lfsx {{[0-9]+}}, [[REG1]], -; CHECK-DAG: lfsx {{[0-9]+}}, [[REG2]], +; CHECK-DAG: lfs {{[0-9]+}}, 0([[REG1]]) +; CHECK-DAG: lfs {{[0-9]+}}, 0([[REG2]]) ; CHECK: blr ; PIP-LABEL: @foo Index: llvm/test/CodeGen/PowerPC/ctrloop-ne.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ctrloop-ne.ll +++ llvm/test/CodeGen/PowerPC/ctrloop-ne.ll @@ -32,8 +32,7 @@ ; CHECK: test_pos2_ir_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -62,8 +61,7 @@ ; CHECK: test_pos4_ir_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -92,8 +90,7 @@ ; CHECK: test_pos8_ir_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -122,8 +119,7 @@ ; CHECK: test_pos16_ir_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -181,8 +177,7 @@ ; CHECK: test_pos2_ri_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -211,8 +206,7 @@ ; CHECK: test_pos4_ri_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -241,8 +235,7 @@ ; CHECK: test_pos8_ri_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -271,8 +264,7 @@ ; CHECK: test_pos16_ri_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -330,8 +322,7 @@ ; CHECK: test_pos2_rr_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -360,8 +351,7 @@ ; CHECK: test_pos4_rr_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -390,8 +380,7 @@ ; CHECK: test_pos8_rr_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -420,8 +409,7 @@ ; CHECK: test_pos16_rr_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: Index: llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll +++ llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll @@ -86,10 +86,12 @@ } ; Function Attrs: norecurse nounwind +; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8. +; a2q should use mtctr, but pwr8 should not use mtctr. define signext i32 @testTripCount2NonSmallLoop() { ; CHECK-LABEL: testTripCount2NonSmallLoop: -; CHECK: blt -; CHECK: beq +; CHECK-A2Q: mtctr +; CHECK-PWR8-NOT: mtctr ; CHECK: blr entry: Index: llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll =================================================================== --- llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll +++ llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll @@ -14,32 +14,31 @@ define void @foo(float* nocapture %data, float %d) { ; CHECK-LABEL: foo: ; CHECK: .LBB0_1: # %vector.body -; CHECK: add 5, 3, 4 -; CHECK-NEXT: stxvx 0, 3, 4 +; CHECK: stxv 0, -192(4) +; CHECK-NEXT: stxv 0, -176(4) +; CHECK-NEXT: stxv 0, -160(4) +; CHECK-NEXT: stxv 0, -144(4) +; CHECK-NEXT: stxv 0, -128(4) +; CHECK-NEXT: stxv 0, -112(4) +; CHECK-NEXT: stxv 0, -96(4) +; CHECK-NEXT: stxv 0, -80(4) +; CHECK-NEXT: stxv 0, -64(4) +; CHECK-NEXT: stxv 0, -48(4) +; CHECK-NEXT: stxv 0, -32(4) +; CHECK-NEXT: stxv 0, -16(4) +; CHECK-NEXT: stxv 0, 0(4) +; CHECK-NEXT: stxv 0, 16(4) +; CHECK-NEXT: stxv 0, 32(4) +; CHECK-NEXT: stxv 0, 48(4) +; CHECK-NEXT: stxv 0, 64(4) +; CHECK-NEXT: stxv 0, 80(4) +; CHECK-NEXT: stxv 0, 96(4) +; CHECK-NEXT: stxv 0, 112(4) +; CHECK-NEXT: stxv 0, 128(4) +; CHECK-NEXT: stxv 0, 144(4) +; CHECK-NEXT: stxv 0, 160(4) +; CHECK-NEXT: stxv 0, 176(4) ; CHECK-NEXT: addi 4, 4, 384 -; CHECK-NEXT: stxv 0, 16(5) -; CHECK-NEXT: stxv 0, 32(5) -; CHECK-NEXT: stxv 0, 48(5) -; CHECK-NEXT: stxv 0, 64(5) -; CHECK-NEXT: stxv 0, 80(5) -; CHECK-NEXT: stxv 0, 96(5) -; CHECK-NEXT: stxv 0, 112(5) -; CHECK-NEXT: stxv 0, 128(5) -; CHECK-NEXT: stxv 0, 144(5) -; CHECK-NEXT: stxv 0, 160(5) -; CHECK-NEXT: stxv 0, 176(5) -; CHECK-NEXT: stxv 0, 192(5) -; CHECK-NEXT: stxv 0, 208(5) -; CHECK-NEXT: stxv 0, 224(5) -; CHECK-NEXT: stxv 0, 240(5) -; CHECK-NEXT: stxv 0, 256(5) -; CHECK-NEXT: stxv 0, 272(5) -; CHECK-NEXT: stxv 0, 288(5) -; CHECK-NEXT: stxv 0, 304(5) -; CHECK-NEXT: stxv 0, 320(5) -; CHECK-NEXT: stxv 0, 336(5) -; CHECK-NEXT: stxv 0, 352(5) -; CHECK-NEXT: stxv 0, 368(5) ; CHECK-NEXT: bdnz .LBB0_1 entry: Index: llvm/test/CodeGen/PowerPC/negctr.ll =================================================================== --- llvm/test/CodeGen/PowerPC/negctr.ll +++ llvm/test/CodeGen/PowerPC/negctr.ll @@ -35,10 +35,12 @@ %exitcond = icmp eq i64 %indvars.iv.next, 0 br i1 %exitcond, label %for.end, label %for.body +; FIXME: This should be a hardware loop. ; CHECK: @main1 -; CHECK: li [[REG:[0-9]+]], -1 -; CHECK: mtctr [[REG]] -; CHECK: bdnz +; CHECK: li [[REG:[0-9]+]], 1 +; CHECK: addi [[REG2:[0-9]+]], [[REG]], 1 +; CHECK: cmpld +; CHECK: bge for.end: ; preds = %for.body, %entry ret void Index: llvm/test/CodeGen/PowerPC/stwu-sched.ll =================================================================== --- llvm/test/CodeGen/PowerPC/stwu-sched.ll +++ llvm/test/CodeGen/PowerPC/stwu-sched.ll @@ -1,9 +1,9 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s -verify-machineinstrs | FileCheck %s ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s -verify-machineinstrs | FileCheck %s -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s -verify-machineinstrs | FileCheck %s \ -; RUN: --check-prefix=CHECK-ITIN -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s -verify-machineinstrs | FileCheck %s \ -; RUN: --check-prefix=CHECK-ITIN +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-ctrloops < %s -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=CHECK-ITIN +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-ctrloops < %s -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=CHECK-ITIN %0 = type { i32, i32 } @@ -12,11 +12,11 @@ define void @initCombList(%0* nocapture, i32 signext) local_unnamed_addr #0 { ; CHECK-LABEL: initCombList: ; CHECK: addi 4, 4, -8 -; CHECK: stwu 5, 64(3) +; CHECK: stwu [[REG:[0-9]+]], 64(3) ; CHECK-ITIN-LABEL: initCombList: -; CHECK-ITIN: stwu 5, 64(4) -; CHECK-ITIN-NEXT: addi 3, 3, -8 +; CHECK-ITIN: stwu [[REG:[0-9]+]], 64(3) +; CHECK-ITIN-NEXT: addi [[REG2:[0-9]+]], [[REG2]], 8 %3 = zext i32 %1 to i64 Index: llvm/test/CodeGen/PowerPC/unal-altivec.ll =================================================================== --- llvm/test/CodeGen/PowerPC/unal-altivec.ll +++ llvm/test/CodeGen/PowerPC/unal-altivec.ll @@ -29,15 +29,14 @@ br i1 %10, label %for.end, label %vector.body ; CHECK: @foo -; CHECK-DAG: li [[C0:[0-9]+]], 0 +; CHECK-DAG: li [[C16:[0-9]+]], 16 ; CHECK-DAG: lvx [[CNST:[0-9]+]], ; CHECK: .LBB0_1: -; CHECK-DAG: lvsl [[MASK1:[0-9]+]], [[B1:[0-9]+]], [[C0]] -; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]] -; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]] -; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], -; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[MASK1]] -; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]] +; CHECK-DAG: lvx [[LD1:[0-9]+]], 0, [[C0:[0-9]+]] +; CHECK-DAG: lvx [[LD2:[0-9]+]], [[C0]], [[C16]] +; CHECK-DAG: lvsl [[MASK1:[0-9]+]], 0, [[C0]] +; CHECK-DAG: vperm [[VR1:[0-9]+]], [[LD1]], [[LD2]], [[MASK1]] +; CHECK-DAG: vaddfp {{[0-9]+}}, [[VR1]], [[CNST]] ; CHECK: blr for.end: ; preds = %vector.body