diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -98,7 +98,7 @@ Loop *L = nullptr; BasicBlock *ExitBlock = nullptr; BranchInst *ExitBranch = nullptr; - const SCEV *ExitCount = nullptr; + const SCEV *TripCount = nullptr; IntegerType *CountType = nullptr; Value *LoopDecrement = nullptr; // Decrement the loop counter by this // value in every iteration. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -104,6 +104,7 @@ SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); + const SCEV *ExitCount = nullptr; for (BasicBlock *BB : ExitingBlocks) { // If we pass the updated counter back through a phi, we need to know // which latch the updated value will be coming from. @@ -173,6 +174,29 @@ if (!ExitBlock) return false; + + if (ExitCount->getType()->isPointerTy()) { + TripCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); + return true; + } + + assert((SE.getTypeSizeInBits(ExitCount->getType()) <= + CountType->getBitWidth()) && + "Invalid loop count type!\n"); + // If we are going to extend the ExitCount and we can prove that ExitCount + 1 + // doesn't overflow, we can first do ExitCount + 1 to do some folding related + // to -1 and +1. + if ((ExitCount->getType() != CountType) && + SE.isLoopEntryGuardedByCond( + L, ICmpInst::ICMP_NE, + SE.getAddExpr(ExitCount, SE.getOne(ExitCount->getType())), + SE.getZero(ExitCount->getType()))) + TripCount = SE.getZeroExtendExpr( + SE.getAddExpr(ExitCount, SE.getOne(ExitCount->getType())), CountType); + else + TripCount = SE.getAddExpr(SE.getNoopOrZeroExtend(ExitCount, CountType), + SE.getOne(CountType)); + return true; } diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -184,15 +184,12 @@ public: HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, - const DataLayout &DL, - OptimizationRemarkEmitter *ORE) : - SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), - ExitCount(Info.ExitCount), - CountType(Info.CountType), - ExitBranch(Info.ExitBranch), - LoopDecrement(Info.LoopDecrement), - UsePHICounter(Info.CounterInReg), - UseLoopGuard(Info.PerformEntryTest) { } + const DataLayout &DL, OptimizationRemarkEmitter *ORE) + : SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), + TripCount(Info.TripCount), CountType(Info.CountType), + ExitBranch(Info.ExitBranch), LoopDecrement(Info.LoopDecrement), + UsePHICounter(Info.CounterInReg), + UseLoopGuard(Info.PerformEntryTest) {} void Create(); @@ -200,15 +197,15 @@ ScalarEvolution &SE; const DataLayout &DL; OptimizationRemarkEmitter *ORE = nullptr; - Loop *L = nullptr; - Module *M = nullptr; - const SCEV *ExitCount = nullptr; - Type *CountType = nullptr; - BranchInst *ExitBranch = nullptr; - Value *LoopDecrement = nullptr; - bool UsePHICounter = false; - bool UseLoopGuard = false; - BasicBlock *BeginBB = nullptr; + Loop *L = nullptr; + Module *M = nullptr; + const SCEV *TripCount = nullptr; + Type *CountType = nullptr; + BranchInst *ExitBranch = nullptr; + Value *LoopDecrement = nullptr; + bool UsePHICounter = false; + bool UseLoopGuard = false; + BasicBlock *BeginBB = nullptr; }; } @@ -296,7 +293,7 @@ } assert( - (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && + (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) && "Hardware Loop must have set exit info."); BasicBlock *Preheader = L->getLoopPreheader(); @@ -381,18 +378,13 @@ // loop counter and tests that is not zero? SCEVExpander SCEVE(SE, DL, "loopcnt"); - if (!ExitCount->getType()->isPointerTy() && - ExitCount->getType() != CountType) - ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); - - ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); // If we're trying to use the 'test and set' form of the intrinsic, we need // to replace a conditional branch that is controlling entry to the loop. It // is likely (guaranteed?) that the preheader has an unconditional branch to // the loop header, so also check if it has a single predecessor. - if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, - SE.getZero(ExitCount->getType()))) { + if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount, + SE.getZero(TripCount->getType()))) { LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); UseLoopGuard |= ForceGuardLoopEntry; } else @@ -404,20 +396,19 @@ BasicBlock *Predecessor = BB->getSinglePredecessor(); // If it's not safe to create a while loop then don't force it and create a // do-while loop instead - if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE)) - UseLoopGuard = false; + if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE)) + UseLoopGuard = false; else - BB = Predecessor; + BB = Predecessor; } - if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { - LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " - << *ExitCount << "\n"); + if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) { + LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount + << "\n"); return nullptr; } - Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, - BB->getTerminator()); + Value *Count = SCEVE.expandCodeFor(TripCount, CountType, BB->getTerminator()); // FIXME: We've expanded Count where we hope to insert the counter setting // intrinsic. But, in the case of the 'test and set' form, we may fallback to diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll --- a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll @@ -20,10 +20,8 @@ ; CHECK-NEXT: blt cr0, .LBB0_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: addi r6, r3, 5 -; CHECK-NEXT: addi r3, r4, -1 +; CHECK-NEXT: clrldi r3, r4, 32 ; CHECK-NEXT: extsw r5, r5 -; CHECK-NEXT: clrldi r3, r3, 32 -; CHECK-NEXT: addi r3, r3, 1 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 5 @@ -87,23 +85,21 @@ ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB1_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: addi r4, r4, -1 -; CHECK-NEXT: addi r3, r3, 1000 +; CHECK-NEXT: addi r6, r3, 1000 +; CHECK-NEXT: clrldi r3, r4, 32 ; CHECK-NEXT: extsw r5, r5 -; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: clrldi r4, r4, 32 -; CHECK-NEXT: addi r4, r4, 1 -; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lbzx r7, r3, r6 -; CHECK-NEXT: add r6, r6, r5 -; CHECK-NEXT: add r4, r7, r4 +; CHECK-NEXT: lbzx r7, r6, r4 +; CHECK-NEXT: add r4, r4, r5 +; CHECK-NEXT: add r3, r7, r3 ; CHECK-NEXT: bdnz .LBB1_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup -; CHECK-NEXT: clrldi r3, r4, 56 +; CHECK-NEXT: clrldi r3, r3, 56 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: li r3, 0 diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll --- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll +++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll @@ -263,9 +263,7 @@ ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB3_5 ; CHECK-NEXT: .LBB3_3: # %for.body.preheader -; CHECK-NEXT: addi r3, r4, -1 -; CHECK-NEXT: clrldi r3, r3, 32 -; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: clrldi r3, r4, 32 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB3_4: # %for.body @@ -295,9 +293,7 @@ ; CHECK-BE-NEXT: cmpwi r4, 1 ; CHECK-BE-NEXT: blt cr0, .LBB3_5 ; CHECK-BE-NEXT: .LBB3_3: # %for.body.preheader -; CHECK-BE-NEXT: addi r3, r4, -1 -; CHECK-BE-NEXT: clrldi r3, r3, 32 -; CHECK-BE-NEXT: addi r3, r3, 1 +; CHECK-BE-NEXT: clrldi r3, r4, 32 ; CHECK-BE-NEXT: mtctr r3 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB3_4: # %for.body