diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -169,7 +169,7 @@ private: PPCTargetMachine *TM = nullptr; - const PPCSubtarget *ST; + const PPCSubtarget *ST; DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; @@ -189,6 +189,10 @@ const SCEVConstant *BasePtrIncSCEV, InstrForm Form); + /// Get the value which defines the increment SCEV \p BasePtrIncSCEV. + Value *getPreparedIncNode(Loop *L, Instruction *MemI, + const SCEV *BasePtrIncSCEV); + /// Collect condition matched(\p isValidCandidate() returns true) /// candidates in Loop \p L. SmallVector collectCandidates( @@ -266,7 +270,7 @@ if (I->hasName()) return (I->getName() + Suffix).str(); else - return ""; + return ""; } static Value *GetPointerOperand(Value *MemI) { @@ -404,7 +408,7 @@ // contains following load/stores with different remainders: // 1: 10 load/store whose remainder is 1; // 2: 9 load/store whose remainder is 2; - // 3: 1 for remainder 3 and 0 for remainder 0; + // 3: 1 for remainder 3 and 0 for remainder 0; // Now we will choose the first load/store whose remainder is 1 as base and // adjust all other load/stores according to new base, so we will get 10 DS // form and 10 X form. @@ -515,27 +519,47 @@ if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L)) return MadeChange; - const SCEVConstant *BasePtrIncSCEV = - dyn_cast(BasePtrSCEV->getStepRecurrence(*SE)); - if (!BasePtrIncSCEV) + bool IsConstantInc = false; + const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE); + Value *IncNode = getPreparedIncNode(L, MemI, BasePtrIncSCEV); + + const SCEVConstant *BasePtrIncConstantSCEV = + dyn_cast(BasePtrIncSCEV); + if (BasePtrIncConstantSCEV) + IsConstantInc = true; + + // No valid representation for the increment. + if (!IncNode) { + LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!"); + return MadeChange; + } + + // Now we only handle update form for constant increment. + // FIXME: add support for non-constant increment UpdateForm. + if (!IsConstantInc && Form == UpdateForm) { + LLVM_DEBUG(dbgs() << "not a constant incresement for update form!"); return MadeChange; + } // For some DS form load/store instructions, it can also be an update form, // if the stride is a multipler of 4. Use update form if prefer it. - bool CanPreInc = (Form == UpdateForm || - ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) && - PreferUpdateForm)); + bool CanPreInc = + (Form == UpdateForm || + ((Form == DSForm) && IsConstantInc && + !BasePtrIncConstantSCEV->getAPInt().urem(4) && PreferUpdateForm)); const SCEV *BasePtrStartSCEV = nullptr; if (CanPreInc) BasePtrStartSCEV = - SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV); + SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncConstantSCEV); else BasePtrStartSCEV = BasePtrSCEV->getStart(); if (!isSafeToExpand(BasePtrStartSCEV, *SE)) return MadeChange; - if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) + // FIXME: check already prepared PHI for non constant increment. + if (IsConstantInc && + alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncConstantSCEV, Form)) return MadeChange; LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); @@ -565,9 +589,11 @@ Instruction *PtrInc = nullptr; Instruction *NewBasePtr = nullptr; if (CanPreInc) { + assert(BasePtrIncConstantSCEV && + "update form now only supports constant increment."); Instruction *InsPoint = &*Header->getFirstInsertionPt(); PtrInc = GetElementPtrInst::Create( - I8Ty, NewPHI, BasePtrIncSCEV->getValue(), + I8Ty, NewPHI, BasePtrIncConstantSCEV->getValue(), getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint); cast(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr)); for (auto PI : predecessors(Header)) { @@ -594,9 +620,8 @@ BasicBlock *BB = PI; Instruction *InsPoint = BB->getTerminator(); PtrInc = GetElementPtrInst::Create( - I8Ty, NewPHI, BasePtrIncSCEV->getValue(), - getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint); - + I8Ty, NewPHI, IncNode, getInstrName(MemI, GEPNodeIncNameSuffix), + InsPoint); cast(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr)); NewPHI->addIncoming(PtrInc, PI); @@ -673,7 +698,7 @@ MadeChange = true; - SuccPrepCount++; + SuccPrepCount++; if (Form == DSForm && !CanPreInc) DSFormChainRewritten++; @@ -726,6 +751,62 @@ return MadeChange; } +Value *PPCLoopInstrFormPrep::getPreparedIncNode(Loop *L, Instruction *MemI, + const SCEV *BasePtrIncSCEV) { + if (isa(BasePtrIncSCEV)) + return cast(BasePtrIncSCEV)->getValue(); + + BasicBlock *BB = MemI->getParent(); + if (!BB) + return nullptr; + + BasicBlock *PredBB = L->getLoopPredecessor(); + BasicBlock *LatchBB = L->getLoopLatch(); + + if (!PredBB || !LatchBB) + return nullptr; + + // Run through the PHIs and check their add users to find valid representation + // for the increment SCEV. + iterator_range PHIIter = BB->phis(); + for (auto &CurrentPHI : PHIIter) { + PHINode *CurrentPHINode = dyn_cast(&CurrentPHI); + if (!CurrentPHINode) + continue; + + if (!SE->isSCEVable(CurrentPHINode->getType())) + continue; + + const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L); + + const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast(PHISCEV); + if (!PHIBasePtrSCEV) + continue; + + const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE); + if (!PHIBasePtrIncSCEV) + continue; + + if (CurrentPHINode->getNumIncomingValues() == 2) { + if ((CurrentPHINode->getIncomingBlock(0) == LatchBB && + CurrentPHINode->getIncomingBlock(1) == PredBB) || + (CurrentPHINode->getIncomingBlock(1) == LatchBB && + CurrentPHINode->getIncomingBlock(0) == PredBB)) { + if (PHIBasePtrIncSCEV == BasePtrIncSCEV) + for (User *User : CurrentPHINode->users()) + if (Instruction *I = dyn_cast(User)) + if (I->getOpcode() == Instruction::Add) { + if (SE->getSCEVAtScope(I->getOperand(0), L) == BasePtrIncSCEV) + return I->getOperand(0); + if (SE->getSCEVAtScope(I->getOperand(1), L) == BasePtrIncSCEV) + return I->getOperand(1); + } + } + } + } + return nullptr; +} + // In order to prepare for the preferred instruction form, a PHI is added. // This function will check to see if that PHI already exists and will return // true if it found an existing PHI with the matched start and increment as the @@ -777,7 +858,7 @@ PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) { ++PHINodeAlreadyExistsUpdate; return true; - } + } if (Form == DSForm || Form == DQForm) { const SCEVConstant *Diff = dyn_cast( SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV)); @@ -788,7 +869,7 @@ ++PHINodeAlreadyExistsDQ; return true; } - } + } } } } diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll --- a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll @@ -2,9 +2,6 @@ ; RUN: llc -disable-lsr -ppc-asm-full-reg-names -verify-machineinstrs \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s -; FIXME: PPCLoopInstrFormPrep should be able to common base for "(unsigned long long *)(p + j + 5)" -; and "(unsigned long long *)(p + j + 9)", thus we only have two DS form load inside the loop. - ; long long foo(char *p, int n, int count) { ; int j = 0; ; long long sum = 0; @@ -22,29 +19,24 @@ ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB0_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r6, r3, 5 +; CHECK-NEXT: clrldi r3, r4, 32 ; CHECK-NEXT: extsw r5, r5 -; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: li r7, 5 -; CHECK-NEXT: mtctr r4 -; CHECK-NEXT: li r8, 9 -; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: add r9, r3, r6 +; CHECK-NEXT: ld r4, 0(r6) +; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: ld r4, 4(r6) ; CHECK-NEXT: add r6, r6, r5 -; CHECK-NEXT: ldx r10, r9, r7 -; CHECK-NEXT: ldx r9, r9, r8 -; CHECK-NEXT: add r4, r10, r4 -; CHECK-NEXT: add r4, r4, r9 +; CHECK-NEXT: add r3, r3, r4 ; CHECK-NEXT: bdnz .LBB0_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup -; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: blr entry: %cmp16 = icmp sgt i32 %n, 0