Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -359,7 +359,10 @@ AliasSetId(AliasSetId), Expr(Expr) {} }; - RuntimePointerChecking(ScalarEvolution *SE) : Need(false), SE(SE) {} + RuntimePointerChecking(ScalarEvolution *SE, + bool PartialRTCheckingAllowed = false) + : Need(false), SE(SE), + PartialRTChecking(PartialRTCheckingAllowed) {} /// Reset the state of the pointer runtime information. void reset() { @@ -471,6 +474,24 @@ return Pointers[PtrIdx]; } + /// \brief Return true if it is allowed to have an incomplete set of runtime + /// checks of pointers. 'incomplete set' means the set does not have checks + /// for all pointers because bounds of some pointers can be unknown. + bool isPartialCheckingAllowed() const { + return PartialRTChecking; + } + + /// \brief Notify RuntimePointerChecking that Ptr has unknown bounds. + void pointerWithUnknownBounds(Value *Ptr) { + assert(Ptr); + PtrsWithUnknownBounds.push_back(Ptr); + } + + /// \brief Get a list of pointers which have unknonw bounds. + const SmallVectorImpl &getPointersWithUnknownBounds() const { + return PtrsWithUnknownBounds; + } + private: /// \brief Groups pointers such that a single memcheck is required /// between two different groups. This will clear the CheckingGroups vector @@ -489,6 +510,15 @@ /// \brief Set of run-time checks required to establish independence of /// otherwise may-aliasing pointers in the loop. SmallVector Checks; + + /// \brief Set of pointers with unknown bounds. + SmallVector PtrsWithUnknownBounds; + + /// \brief This flag indicates if it is allowed to have incomplete set of + /// checks which means there might be no checks for pointers with unknown + /// memory bounds. An user of RuntimePointerChecking is responsible for + /// creating checks for such pointers. + bool PartialRTChecking; }; /// \brief Drive the analysis of memory accesses in the loop @@ -515,7 +545,8 @@ class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI); + AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, + bool PartialRTCheckingAllowed = false); // FIXME: // Hack for MSVC 2013 which sems like it can't synthesize this even @@ -742,7 +773,8 @@ /// \brief Query the result of the loop access information for the loop \p L. /// /// If there is no cached result available run the analysis. - const LoopAccessInfo &getInfo(Loop *L); + const LoopAccessInfo &getInfo(Loop *L, + bool PartialRTCheckingAllowed = false); void releaseMemory() override { // Invalidate the cache when the pass is freed. Index: include/llvm/Transforms/Utils/LoopVersioning.h =================================================================== --- include/llvm/Transforms/Utils/LoopVersioning.h +++ include/llvm/Transforms/Utils/LoopVersioning.h @@ -56,11 +56,17 @@ /// analyze L /// if versioning is necessary version L /// transform L - void versionLoop() { versionLoop(findDefsUsedOutsideOfLoop(VersionedLoop)); } + /// + /// \return BasicBlock which contains runtime checks. + BasicBlock *versionLoop() { + return versionLoop(findDefsUsedOutsideOfLoop(VersionedLoop)); + } /// \brief Same but if the client has already precomputed the set of values /// used outside the loop, this API will allows passing that. - void versionLoop(const SmallVectorImpl &DefsUsedOutside); + /// + /// \return BasicBlock which contains runtime checks. + BasicBlock *versionLoop(const SmallVectorImpl &DefsUsedOutside); /// \brief Returns the versioned loop. Control flows here if pointers in the /// loop don't alias (i.e. all memchecks passed). (This loop is actually the Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -658,7 +658,11 @@ DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); - CanDoRT = false; + if (RtCheck.isPartialCheckingAllowed()) { + RtCheck.pointerWithUnknownBounds(Ptr); + } else { + CanDoRT = false; + } } } @@ -1955,9 +1959,10 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI) + DominatorTree *DT, LoopInfo *LI, + bool PartialRTCheckingAllowed) : PSE(llvm::make_unique(*SE, *L)), - PtrRtChecking(llvm::make_unique(SE)), + PtrRtChecking(llvm::make_unique(SE, PartialRTCheckingAllowed)), DepChecker(llvm::make_unique(*PSE, L)), TheLoop(L), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), StoreToLoopInvariantAddress(false) { @@ -2005,11 +2010,14 @@ PSE->print(OS, Depth); } -const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { +const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L, + bool PartialRTCheckingAllowed) { auto &LAI = LoopAccessInfoMap[L]; - if (!LAI) - LAI = llvm::make_unique(L, SE, TLI, AA, DT, LI); + if (!LAI) { + LAI = llvm::make_unique(L, SE, TLI, AA, DT, LI, + PartialRTCheckingAllowed); + } return *LAI.get(); } Index: lib/Transforms/Scalar/LoopVersioningLICM.cpp =================================================================== --- lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -14,11 +14,10 @@ // // Loop Versioning will create a version of the loop with aggressive aliasing // assumptions in addition to the original with conservative (default) aliasing -// assumptions. The version of the loop making aggressive aliasing assumptions -// will have all the memory accesses marked as no-alias. These two versions of -// loop will be preceded by a memory runtime check. This runtime check consists -// of bound checks for all unique memory accessed in loop, and it ensures the -// lack of memory aliasing. The result of the runtime check determines which of +// assumptions. These two versions of loop will be preceded by a memory runtime +// check. This runtime check consists of bound checks for unique memory accessed +// in loop. It ensures the lack of memory aliasing among memory operations which +// can be optimized by LICM. The result of the runtime check determines which of // the loop versions is executed: If the runtime check detects any memory // aliasing, then the original loop is executed. Otherwise, the version with // aggressive aliasing assumptions is used. @@ -28,7 +27,8 @@ // a) Perform LoopVersioningLICM's feasibility check. // b) If loop is a candidate for versioning then create a memory bound check, // by considering all the memory accesses in loop body. -// c) Clone original loop and set all memory accesses as no-alias in new loop. +// c) Clone original loop and set memory accesses having runtime checks +// as no-alias in new loop. // d) Set original loop & versioned loop as a branch target of the runtime check // result. // @@ -144,6 +144,19 @@ } namespace { +/// \brief This structure keeps information about a pointer loaded from memory +// and increased in a constant value. +struct LoadedPointerInfo { + /// A load instruction which loads a pointer. + const LoadInst *Load; + + /// An increment instruction which increased the loaded pointer. + const GetElementPtrInst *IncExpr; + + LoadedPointerInfo(const LoadInst *Load, const GetElementPtrInst *Expr): + Load(Load), IncExpr(Expr) {} +}; + struct LoopVersioningLICM : public LoopPass { static char ID; @@ -158,39 +171,39 @@ AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequired(); - AU.addRequired(); AU.addPreserved(); AU.addPreserved(); } LoopVersioningLICM() - : LoopPass(ID), AA(nullptr), SE(nullptr), LI(nullptr), DT(nullptr), - TLI(nullptr), LAA(nullptr), LAI(nullptr), Changed(false), - Preheader(nullptr), CurLoop(nullptr), CurAST(nullptr), + : LoopPass(ID), AA(nullptr), SE(nullptr), + LAA(nullptr), LAI(nullptr), + CurLoop(nullptr), LoopDepthThreshold(LVLoopDepthThreshold), InvariantThreshold(LVInvarThreshold), LoadAndStoreCounter(0), - InvariantCounter(0), IsReadOnlyLoop(true) { + IsReadOnlyLoop(true) { initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry()); } + const char *getPassName() const override { return "Loop Versioning for LICM"; } +private: AliasAnalysis *AA; // Current AliasAnalysis information ScalarEvolution *SE; // Current ScalarEvolution - LoopInfo *LI; // Current LoopInfo - DominatorTree *DT; // Dominator Tree for the current Loop. - TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. LoopAccessLegacyAnalysis *LAA; // Current LoopAccessAnalysis const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo - bool Changed; // Set to true when we change anything. - BasicBlock *Preheader; // The preheader block of the current loop. Loop *CurLoop; // The current loop we are working on. - AliasSetTracker *CurAST; // AliasSet information for the current loop. - ValueToValueMap Strides; + std::unique_ptr CurAST; // AliasSet information for the current loop. + + // A list of pointers loaded from memory in the current loop. + SmallVector LoadedPointers; + + // A list of pointers which are invariants of the current loop. + SmallPtrSet LoopInvPtrs; unsigned LoopDepthThreshold; // Maximum loop nest threshold float InvariantThreshold; // Minimum invariant threshold unsigned LoadAndStoreCounter; // Counter to track num of load & store - unsigned InvariantCounter; // Counter to track num of invariant bool IsReadOnlyLoop; // Read only loop marker. bool isLegalForVersioning(); @@ -198,9 +211,11 @@ bool legalLoopInstructions(); bool legalLoopMemoryAccesses(); bool isLoopAlreadyVisited(); - void setNoAliasToLoop(Loop *); + void annotateLoopWithNoAlias(LoopVersioning &LVer); bool instructionSafeForVersioning(Instruction *); - const char *getPassName() const override { return "Loop Versioning"; } + bool processPointersWithUnknownBounds(); + bool canLoadedPtrHaveBounds(const LoadInst *Load); + void addLoadedPointersRTChecks(BasicBlock *RuntimeCheckBB); }; } @@ -263,7 +278,6 @@ /// LoopVersioningLICM. bool LoopVersioningLICM::legalLoopMemoryAccesses() { bool HasMayAlias = false; - bool TypeSafety = false; bool HasMod = false; // Memory check: // Transform phase will generate a versioned loop and also a runtime check to @@ -286,23 +300,9 @@ // With MustAlias its not worth adding runtime bound check. if (AS.isMustAlias()) return false; - Value *SomePtr = AS.begin()->getValue(); - bool TypeCheck = true; // Check for Mod & MayAlias HasMayAlias |= AS.isMayAlias(); HasMod |= AS.isMod(); - for (const auto &A : AS) { - Value *Ptr = A.getValue(); - // Alias tracker should have pointers of same data type. - TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType())); - } - // At least one alias tracker should have pointers of same data type. - TypeSafety |= TypeCheck; - } - // Ensure types should be of same type. - if (!TypeSafety) { - DEBUG(dbgs() << " Alias tracker type safety failed!\n"); - return false; } // Ensure loop body shouldn't be read only. if (!HasMod) { @@ -348,7 +348,7 @@ Value *Ptr = Ld->getPointerOperand(); // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) - InvariantCounter++; + LoopInvPtrs.insert(Ptr); } // If current instruction is store instruction // make sure it's a simple store (non atomic & non volatile) @@ -362,19 +362,286 @@ Value *Ptr = St->getPointerOperand(); // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) - InvariantCounter++; + LoopInvPtrs.insert(Ptr); IsReadOnlyLoop = false; } return true; } +/// \brief Check that other users of V other than U are only memory operations. +static bool areOtherUsersOnlyMemOps(const Value *V, const User *U) { + assert(V); + assert(U); + assert(!V->hasOneUse()); + for (const User *OU: V->users()) { + if (const Instruction *I = dyn_cast(OU)) { + if (!I->mayReadOrWriteMemory() && (I != U)) + return false; + } else + return false; + } + return true; +} + +/// \brief Check if a pointer loaded by Load can have bounds. +/// +/// Requirements: +/// 1. Load is be simple. +/// 2. Load pointer operand is a loop invariant. +/// 3. The loaded pointer is increased with GetElementPtrInst. +/// 4. Other users of the loaded pointer different from GetElementPtrInst, +/// if any, are only memory instructions. +/// 5. GEP has one constant index. +/// 6. GEP has one user. +/// 7. The user is StoreIstr. +/// 8. Store is simple. +/// 9. Store pointer operand is the same as Load pointer operand. +/// +/// \param Load An instruction loading a pointer. +/// \return true if a loaded pointer can have bounds. +bool LoopVersioningLICM::canLoadedPtrHaveBounds(const LoadInst * Load) { + assert(Load); + + DEBUG(dbgs() << " Checking a pointer loaded by: " << *Load << "\n"); + + if (!Load->isSimple()) { + DEBUG(dbgs() << " Failed: load is not simple.\n"); + return false; + } + + const Value *LoadAddr = Load->getPointerOperand(); + if (!CurLoop->isLoopInvariant(LoadAddr)) { + DEBUG(dbgs() << " Failed: load pointer operand is not a loop invariant.\n"); + return false; + } + + const GetElementPtrInst *GEP = nullptr; + for (const User *U: Load->users()) { + GEP = dyn_cast(U); + if (GEP) + break; + } + if (!GEP) { + DEBUG(dbgs() << " Failed: No GetElementPtrInst using the loaded pointer" + << " is found.\n"); + return false; + } + + if (!Load->hasOneUse() && !areOtherUsersOnlyMemOps(Load, GEP)) { + DEBUG(dbgs() << " Failed: Non-memory operation different from GEP uses Load\n"); + return false; + } + + if (GEP->getNumIndices() != 1) { + DEBUG(dbgs() << " Failed: GEP has more than one index.\n"); + return false; + } + + if (!GEP->hasAllConstantIndices()) { + DEBUG(dbgs() << " Failed: GEP has a non-constant index.\n"); + return false; + } + + if (!GEP->hasOneUse()) { + DEBUG(dbgs() << " Failed: GEP has more than one users.\n"); + return false; + } + + const StoreInst *Store = dyn_cast(*GEP->user_begin()); + if (!Store) { + DEBUG(dbgs() << " Failed: GEP user is not StoreInst.\n"); + return false; + } + + if (!Store->isSimple()) { + DEBUG(dbgs() << " Failed: store is not simple.\n"); + return false; + } + + const Value *StoreAddr = Store->getPointerOperand(); + if (StoreAddr != LoadAddr) { + DEBUG(dbgs() << " Failed: Store address is not the same as Load address.\n"); + return false; + } + + LoadedPointers.push_back(LoadedPointerInfo(Load, GEP)); + + return true; +} + +/// \brief Process pointers with unknown bounds. +/// +/// \return true if all pointers are loaded pointers and can have bounds. +bool LoopVersioningLICM::processPointersWithUnknownBounds() { + assert(CurLoop); + assert(LAI); + assert(LAI->getRuntimePointerChecking()); + + const SmallVectorImpl &PtrsWithUnknBnds = LAI->getRuntimePointerChecking() + ->getPointersWithUnknownBounds(); + if (PtrsWithUnknBnds.empty()) + return true; + + for (auto Ptr: PtrsWithUnknBnds) { + const LoadInst * Load = dyn_cast(Ptr); + if (!Load) { + DEBUG(dbgs() << " It is not a loaded a pointer: " << *Ptr << "\n"); + return false; + } + + if (!canLoadedPtrHaveBounds(Load)) + return false; + } + + return true; +} + +/// \brief Add runtime checks for loaded pointers. +/// +/// After loop versioning there will be RT checks of invariant pointers and +/// pointers with known bounds. Other pointers are defined and used as follows: +/// +/// Ptr = Load(InvPtr) +/// NewPtr = GEP(Ptr, Const) +/// Store(NewPtr, InvPtr) +/// Mem_operations using Ptr +/// +/// In addition to the created RT checks InvPtr needs to be checked with +/// pointers loaded as in the case above. If loaded pointers are not aliased with +/// invariant pointers at some iteration then at the next iteration their values +/// are the values defined by GEP instructions. +/// So without aliasing Ptr has values: +/// +/// [Ptr0, Ptr0 + (number_of_iterations-1) * type_size * GEP_index], where +/// Ptr0 is Load(InvPtr) at the first iteration +/// +/// We need to guarantee that Ptr0 never reaches InvPtr: +/// 4_or_8_aligned(Ptr0) != InvPtr : iteration 1 +/// 4_or_8_aligned(Ptr0 + type_size*GEP_index) != InvPtr : iteration 2 +/// 4_or_8_aligned(Ptr0 + 2*type_size*GEP_index) != InvPtr: iteration 3 +/// ... +/// +/// Aligned Ptr0 is used because InvPtr is a pointer to a pointer and +/// it's aligned either 4 or 8 bytes. +/// +/// We can do more strict checking: if InvPtr is not in +/// [4_or_8_aligned(Ptr0), Ptr0 + (number_of_iterations-1) * type_size * GEP_index] +/// +/// 'Ptr0 + (number_of_iterations-1) * type_size * GEP_index' is used instead of +/// the aligned version because +/// 4_or_8_aligned(Ptr0 + (number_of_iterations-1) * type_size * GEP_index) <= +/// Ptr0 + (number_of_iterations-1) * type_size * GEP_index +/// +/// \param RuntimeCheckBB BasicBlock where LoopVersioning has added checks. +void LoopVersioningLICM::addLoadedPointersRTChecks(BasicBlock *RuntimeCheckBB) { + assert(RuntimeCheckBB); + assert(!LoopInvPtrs.empty()); + assert(RuntimeCheckBB->getTerminator()); + assert(isa(RuntimeCheckBB->getTerminator())); + + if (LoadedPointers.empty()) + return; + + const DataLayout &DL = RuntimeCheckBB->getModule()->getDataLayout(); + + IRBuilder<> ChkBuilder(RuntimeCheckBB->getTerminator()); + + // Get a branch condition which will be OR with additional checking results. + Value *BranchCond = cast(RuntimeCheckBB->getTerminator()) + ->getCondition(); + + const SCEV *CurLoopBackedgeTakenCount = SE->getBackedgeTakenCount(CurLoop); + assert(CurLoopBackedgeTakenCount); + + // Insert calculation of BackedgeTakenCount into the basic block: + // %0 = add i32 %num_iter, -1 + SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), "ind"); + Value *BackedgeTakenCountI32 = Exp.expandCodeFor(CurLoopBackedgeTakenCount, + CurLoopBackedgeTakenCount->getType(), RuntimeCheckBB->getTerminator()); + Value *BackedgeTakenCountI64 = nullptr; + if (BackedgeTakenCountI32->getType()->isIntegerTy(64)) { + BackedgeTakenCountI64 = BackedgeTakenCountI32; + BackedgeTakenCountI32 = nullptr; + } else { + BackedgeTakenCountI64 = ChkBuilder.CreateZExt(BackedgeTakenCountI32, + Type::getInt64Ty(RuntimeCheckBB->getContext())); + } + + for (auto &LoadedPointer: LoadedPointers) { + // Create a copy of an instruction loading a pointer and put it into + // the RT check basic block. This is Ptr0. + Instruction *Load = LoadedPointer.Load->clone(); + Load->setName("ptr0"); + RuntimeCheckBB->getInstList().insert(RuntimeCheckBB->getTerminator()->getIterator(), + Load); + + // Create 4_or_8_aligned(Ptr0): + // %pti = ptrtoint i8* %ptr0 to i32 + // %pa = and i32 %pti, -4 + // %p0s = inttoptr i32 %pa to i8* + Type *LoadedPtrArithTy = Type::getInt8PtrTy(RuntimeCheckBB->getContext(), + Load->getType()->getPointerAddressSpace()); + Value *LoadedPtrStart = ChkBuilder.CreatePtrToInt(Load, + DL.getIntPtrType(Load->getType()), "pti"); + const uint64_t AligmentMask = ~(DL.getPointerTypeSize(Load->getType()) - 1); + LoadedPtrStart = ChkBuilder.CreateAnd(LoadedPtrStart, AligmentMask, "pa"); + LoadedPtrStart = ChkBuilder.CreateIntToPtr(LoadedPtrStart, LoadedPtrArithTy, + "p0s"); + + // Create Ptr0 + (number_of_iterations-1) * type_size * GEP_index + // %incdec.ptr.p0e = getelementptr i8, i8* %ptr0, i32 %0 + Value *LHS = BackedgeTakenCountI64; + Value *RHS = LoadedPointer.IncExpr->getOperand(1); + assert(RHS->getType()->isIntegerTy(32) || RHS->getType()->isIntegerTy(64)); + if (RHS->getType()->isIntegerTy(32)) { + if (BackedgeTakenCountI32) { + LHS = BackedgeTakenCountI32; + } else { + RHS = ChkBuilder.CreateZExt(RHS, + Type::getInt64Ty(RuntimeCheckBB->getContext())); + } + } + Value *Idx = ChkBuilder.CreateMul(LHS, RHS, "idx"); + Value *IdxList[1] = {Idx}; + Value *GEP = ChkBuilder.CreateGEP(Load, IdxList, + LoadedPointer.IncExpr->getName() + ".p0e"); + assert(Load->getType()->getPointerAddressSpace() + == GEP->getType()->getPointerAddressSpace()); + Value *LoadedPtrEnd = ChkBuilder.CreateBitCast(GEP, LoadedPtrArithTy, "p0e"); + + // Create check for all invariant pointers. + for (auto PtrInvariant: LoopInvPtrs) { + // %invp = bitcast i8** %src to i8* + Type *PtrInvariantArithTy = Type::getInt8PtrTy(RuntimeCheckBB->getContext(), + PtrInvariant->getType()->getPointerAddressSpace()); + Value *PtrInvariantBC = ChkBuilder.CreateBitCast(PtrInvariant, + PtrInvariantArithTy, "invp"); + + // 4_or_8_aligned(Ptr0) <= InvPtr + // %bound02 = icmp ule i8* %p0s, %invp + Value *Cmp0 = ChkBuilder.CreateICmpULE(LoadedPtrStart, PtrInvariantBC, + "bound0"); + + // InvPtr <= Ptr0 + (number_of_iterations-1) * type_size * GEP_index + // %bound13 = icmp ule i8* %invp, %incdec.ptr.p0e + Value *Cmp1 = ChkBuilder.CreateICmpULE(PtrInvariantBC, LoadedPtrEnd, + "bound1"); + + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + + // Update the branch condition + BranchCond = ChkBuilder.CreateOr(BranchCond, IsConflict, "conflict.rdx"); + } + } + cast(RuntimeCheckBB->getTerminator())->setCondition(BranchCond); +} + /// \brief Check loop instructions and confirms it's good for /// LoopVersioningLICM. bool LoopVersioningLICM::legalLoopInstructions() { // Resetting counters. LoadAndStoreCounter = 0; - InvariantCounter = 0; IsReadOnlyLoop = true; // Iterate over loop blocks and instructions of each block and check // instruction safety. @@ -384,8 +651,9 @@ if (!instructionSafeForVersioning(&Inst)) return false; } - // Get LoopAccessInfo from current loop. - LAI = &LAA->getInfo(CurLoop); + // Get LoopAccessInfo from current loop. + const bool PartialRTCheckingAllowed = true; + LAI = &LAA->getInfo(CurLoop, PartialRTCheckingAllowed); // Check LoopAccessInfo for need of runtime check. if (LAI->getRuntimePointerChecking()->getChecks().empty()) { DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); @@ -398,6 +666,7 @@ return false; } // Loop should have at least one invariant load or store instruction. + const SmallVectorImpl::size_type InvariantCounter = LoopInvPtrs.size(); if (!InvariantCounter) { DEBUG(dbgs() << " Invariant not found !!\n"); return false; @@ -418,6 +687,11 @@ << InvariantThreshold << "%\n"); return false; } + + if (!processPointersWithUnknownBounds()) { + return false; + } + return true; } @@ -467,39 +741,56 @@ return true; } -/// \brief Update loop with aggressive aliasing assumptions. -/// It marks no-alias to any pairs of memory operations by assuming -/// loop should not have any must-alias memory accesses pairs. -/// During LoopVersioningLICM legality we ignore loops having must -/// aliasing memory accesses. -void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { - // Get latch terminator instruction. - Instruction *I = VerLoop->getLoopLatch()->getTerminator(); - // Create alias scope domain. - MDBuilder MDB(I->getContext()); - MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); - StringRef Name = "LVAliasScope"; - SmallVector Scopes, NoAliases; - MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); - // Iterate over each instruction of loop. - // set no-alias for all load & store instructions. - for (auto *Block : CurLoop->getBlocks()) { - for (auto &Inst : *Block) { - // Only interested in instruction that may modify or read memory. - if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory()) - continue; - Scopes.push_back(NewScope); - NoAliases.push_back(NewScope); - // Set no-alias for current instruction. - Inst.setMetadata( - LLVMContext::MD_noalias, - MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias), - MDNode::get(Inst.getContext(), NoAliases))); - // set alias-scope for current instruction. - Inst.setMetadata( - LLVMContext::MD_alias_scope, - MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope), - MDNode::get(Inst.getContext(), Scopes))); +/// \brief Annotate loop memory operations with aliasing information. +/// +/// 1. Annotate operations for which LoopVersioning has created checks. +/// 2. Annotate operations for which we have created checks. +void LoopVersioningLICM::annotateLoopWithNoAlias(LoopVersioning &LVer) { + // Annotate operations for which LoopVersioning has created checks. + LVer.annotateLoopWithNoAlias(); + + if (LoadedPointers.empty()) + return; + + // Annotate operations for which we have created checks. + + // Get memory operations which read from/write to invariant pointers. + SmallPtrSet InvPtrUsers; + for (auto PtrInv: LoopInvPtrs) { + for (User *U: PtrInv->users()) { + if (Instruction *I = dyn_cast(U)) { + if (I->mayReadOrWriteMemory() && CurLoop->contains(I)) { + Value *Ptr = nullptr; + if (LoadInst *L = dyn_cast(I)) { + Ptr = L->getPointerOperand(); + } else if (StoreInst *S = dyn_cast(I)) { + Ptr = S->getPointerOperand(); + } + if (Ptr == PtrInv) + InvPtrUsers.insert(I); + } + } + } + } + + // Now set that users of invariant pointers don't alias with users of + // loaded pointers. + LLVMContext &Context = CurLoop->getHeader()->getContext(); + MDBuilder MDB(Context); + MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerLICMDomain"); + for (auto &LoadedPtr: LoadedPointers) { + MDNode *MDN = MDB.createAnonymousAliasScope(Domain); + for (auto &U: LoadedPtr.Load->uses()) { + Instruction *I = dyn_cast(U.getUser()); + if (I && I->mayReadOrWriteMemory()) { + I->setMetadata(LLVMContext::MD_alias_scope, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Context, MDN))); + } + } + for (auto I: InvPtrUsers) { + I->setMetadata(LLVMContext::MD_noalias, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias), MDN)); } } } @@ -507,27 +798,26 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipLoop(L)) return false; - Changed = false; // Get Analysis information. - LI = &getAnalysis().getLoopInfo(); AA = &getAnalysis().getAAResults(); SE = &getAnalysis().getSE(); - DT = &getAnalysis().getDomTree(); - TLI = &getAnalysis().getTLI(); LAA = &getAnalysis(); LAI = nullptr; // Set Current Loop CurLoop = L; - // Get the preheader block. - Preheader = L->getLoopPreheader(); - // Initial allocation - CurAST = new AliasSetTracker(*AA); + LoadedPointers.clear(); + LoopInvPtrs.clear(); + CurAST.reset(new AliasSetTracker(*AA)); + LoopInfo *LI = &getAnalysis().getLoopInfo(); // Loop over the body of this loop, construct AST. for (auto *Block : L->getBlocks()) { if (LI->getLoopFor(Block) == L) // Ignore blocks in subloop. CurAST->add(*Block); // Incorporate the specified basic block } + + bool Changed = false; + // Check feasiblity of LoopVersioningLICM. // If versioning found to be feasible and beneficial then proceed // else simply return, by cleaning up memory. @@ -535,21 +825,23 @@ // Do loop versioning. // Create memcheck for memory accessed inside loop. // Clone original loop, and set blocks properly. + DominatorTree *DT = &getAnalysis().getDomTree(); LoopVersioning LVer(*LAI, CurLoop, LI, DT, SE, true); - LVer.versionLoop(); + BasicBlock *RTCheckingBB = LVer.versionLoop(); + addLoadedPointersRTChecks(RTCheckingBB); // Set Loop Versioning metaData for original loop. addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData); // Set Loop Versioning metaData for version loop. addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData); - // Set "llvm.mem.parallel_loop_access" metaData to versioned loop. - addStringMetadataToLoop(LVer.getVersionedLoop(), - "llvm.mem.parallel_loop_access"); - // Update version loop with aggressive aliasing assumption. - setNoAliasToLoop(LVer.getVersionedLoop()); + + if (LoadedPointers.empty()) { + // Set "llvm.mem.parallel_loop_access" metaData to versioned loop. + addStringMetadataToLoop(LVer.getVersionedLoop(), + "llvm.mem.parallel_loop_access"); + } + annotateLoopWithNoAlias(LVer); Changed = true; } - // Delete allocated memory. - delete CurAST; return Changed; } Index: lib/Transforms/Utils/LoopVersioning.cpp =================================================================== --- lib/Transforms/Utils/LoopVersioning.cpp +++ lib/Transforms/Utils/LoopVersioning.cpp @@ -52,7 +52,7 @@ Preds = std::move(Check); } -void LoopVersioning::versionLoop( +BasicBlock* LoopVersioning::versionLoop( const SmallVectorImpl &DefsUsedOutside) { Instruction *FirstCheckInst; Instruction *MemRuntimeCheck; @@ -119,6 +119,7 @@ // Adds the necessary PHI nodes for the versioned loops based on the // loop-defined values used outside of the loop. addPHINodes(DefsUsedOutside); + return RuntimeCheckBB; } void LoopVersioning::addPHINodes( Index: test/Transforms/LoopVersioningLICM/copying-bytes-loop-01.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVersioningLICM/copying-bytes-loop-01.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -S -loop-versioning-licm -scoped-noalias -licm 2>&1 | FileCheck %s + +; CHECK: while.body.lver.check:{{.*}} +; CHECK: %ptr0 = load i8{{.*}} +; CHECK: {{.*}}p0e = getelementptr i8{{.*}} +; CHECK: {{.*}}= icmp ule i8*{{.*}} +; CHECK: {{.*}}= icmp ule i8*{{.*}} +; CHECK: while.body.ph.lver.orig:{{.*}} +; CHECK: while.body.lver.orig:{{.*}} +; CHECK: while.body.ph:{{.*}} +; CHECK-NEXT: {{.*}}= load i8*{{.*}} +; CHECK: while.body:{{.*}} +; CHECK: while.end.loopexit.loopexit:{{.*}} +; CHECK: store i8*{{.*}} + +define void @g(i8* nocapture %dst, i8** nocapture %src) nounwind { +entry: + %call = tail call i32 @f() + %tobool3 = icmp eq i32 %call, 0 + br i1 %tobool3, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %bytes_to_read.05 = phi i32 [ %dec, %while.body ], [ %call, %while.body.preheader ] + %dst.addr.04 = phi i8* [ %incdec.ptr1, %while.body ], [ %dst, %while.body.preheader ] + %dec = add nsw i32 %bytes_to_read.05, -1 + %0 = load i8*, i8** %src, align 4 + %incdec.ptr = getelementptr inbounds i8, i8* %0, i32 1 + store i8* %incdec.ptr, i8** %src, align 4 + %1 = load i8, i8* %0, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %dst.addr.04, i32 1 + store i8 %1, i8* %dst.addr.04, align 1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + ret void +} + +declare i32 @f() Index: test/Transforms/LoopVersioningLICM/copying-bytes-loop-02.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVersioningLICM/copying-bytes-loop-02.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -S -loop-versioning-licm -scoped-noalias -licm 2>&1 | FileCheck %s + +; CHECK: while.body.lver.check:{{.*}} +; CHECK: %ptr0 = load i8{{.*}} +; CHECK: {{.*}}p0e = getelementptr i8{{.*}} +; CHECK: {{.*}}= icmp ule i8*{{.*}} +; CHECK: {{.*}}= icmp ule i8*{{.*}} +; CHECK: while.body.ph.lver.orig:{{.*}} +; CHECK: while.body.lver.orig:{{.*}} +; CHECK: while.body.ph:{{.*}} +; CHECK-NEXT: {{.*}}= load i8*{{.*}} +; CHECK: while.body:{{.*}} +; CHECK: while.end.loopexit.loopexit:{{.*}} +; CHECK: store i8*{{.*}} + +%struct.S = type { i8*, i8*, i8* } + +define void @g(%struct.S* nocapture %buf, i8* nocapture readonly %src) nounwind { +entry: + %call = tail call i32 @f() + %tobool3 = icmp eq i32 %call, 0 + br i1 %tobool3, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry + %pos = getelementptr inbounds %struct.S, %struct.S* %buf, i32 0, i32 1 + br label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + %bytes_to_read.05 = phi i32 [ %call, %while.body.lr.ph ], [ %dec, %while.body ] + %src.addr.04 = phi i8* [ %src, %while.body.lr.ph ], [ %incdec.ptr, %while.body ] + %dec = add nsw i32 %bytes_to_read.05, -1 + %incdec.ptr = getelementptr inbounds i8, i8* %src.addr.04, i32 1 + %0 = load i8, i8* %src.addr.04, align 1 + %1 = load i8*, i8** %pos, align 4 + %incdec.ptr1 = getelementptr inbounds i8, i8* %1, i32 1 + store i8* %incdec.ptr1, i8** %pos, align 4 + store i8 %0, i8* %1, align 1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + ret void +} + +declare i32 @f() + Index: test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll @@ -12,11 +12,11 @@ ; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ] ; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64 ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom -; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !2, !noalias !2 +; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !{{[0-9]+}} ; CHECK-NEXT: %add8 = add nsw i32 %[[induction]], %add ; CHECK-NEXT: %inc = add nuw i32 %j.113, 1 ; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr -; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit6, !llvm.loop !5 +; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit6, !llvm.loop !{{[0-9]+}} define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 { entry: %cmp14 = icmp eq i32 %itr, 0 Index: test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll @@ -9,7 +9,7 @@ ; ; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit5: ; preds = %for.body3.us ; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ] -; CHECK-NEXT: store float %add14.us.lcssa, float* %arrayidx.us, align 4, !alias.scope !0, !noalias !0 +; CHECK-NEXT: store float %add14.us.lcssa, float* %arrayidx.us, align 4, !alias.scope !{{[0-9]+}} ; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us ; define i32 @foo(float* nocapture %var2, float** nocapture readonly %var3, i32 %itr) #0 {