Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,10 @@ static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); + +static bool pointerInvalidatedByLoop(LoadInst *LI, Loop *CurLoop, + AliasAnalysis *AA); + static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, @@ -638,7 +642,8 @@ LI->getAAMetadata(AAInfo); bool Invalidated = - pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); + pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST) && + pointerInvalidatedByLoop(LI, CurLoop, AA); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1587,6 +1592,56 @@ return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); } +// Default value of zero implies we use the regular alias set mechanism +// instead of the cross product using AA. +static cl::opt +LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), + cl::desc("How many instruction to cross product using AA")); + +// The alias set mechanism used by LICM has a major weakness in that +// it combines all things which may alias into a single set *before* asking +// modref questions. As a result, a single readonly call within a loop +// will collapse all loads and stores into a single alias set and report +// invalidation if the loop contains any store. For example, readonly calls with +// deopt states have this form and create a general alias set with all loads and +// stores. +// In order to get any LICM in loops containing possible deopt states we need a +// more precise invalidation of checking the mod ref info of each instruction +// within the loop and LI. This has a complexity of O(N^2), so currently, this +// can be used as a diagnostic tool to identify limitation with the alias set +// tracker mechanism. +static bool pointerInvalidatedByLoop(LoadInst *LI, Loop *CurLoop, + AliasAnalysis *AA) { + // Don't look at nested loops. + if (CurLoop->begin() != CurLoop->end()) + return true; + + uint64_t Size = 0; + if (LI->getType()->isSized()) + Size = LI->getModule()->getDataLayout().getTypeStoreSize(LI->getType()); + + AAMDNodes AAInfo; + LI->getAAMetadata(AAInfo); + + int N = 0; + for (BasicBlock *BB : CurLoop->getBlocks()) + for (Instruction &I : *BB) { + if (N >= LICMN2Theshold) { + LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for " << *LI << "\n"); + return true; + } + N++; + auto Res = AA->getModRefInfo( + &I, MemoryLocation(LI->getPointerOperand(), Size, AAInfo)); + if (isModSet(Res)) { + LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for " << *LI << "\n"); + return true; + } + } + LLVM_DEBUG(dbgs() << "Aliasing okay for " << *LI << "\n"); + return false; +} + /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// Index: test/Transforms/LICM/invariant.start.ll =================================================================== --- test/Transforms/LICM/invariant.start.ll +++ test/Transforms/LICM/invariant.start.ll @@ -1,7 +1,9 @@ -; RUN: opt -licm -basicaa < %s -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -licm -basicaa -licm-n2-threshold=0 < %s -S | FileCheck %s +; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2 +; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2 -; TODO: should be able to hoist both load and invariant.start +; TODO: By default (without the -licm-n2-threshold value), we should be able to hoist both load and invariant.start define void @test1(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test1( ; CHECK-LABEL: entry: @@ -9,6 +11,12 @@ ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) ; CHECK: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: @test1( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) + entry: br label %loop @@ -20,7 +28,7 @@ br label %loop } -;; TODO: despite the loop varying invariant.start, we should be +;; TODO: By default, despite the loop varying invariant.start, we should be ;; able to hoist the load define void @test2(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test2( @@ -28,7 +36,12 @@ ; CHECK-LABEL: loop: ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %piv) ; CHECK: %val = load i32, i32* %ptr - + +; ALIAS-N2-LABEL: @test2( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %piv) entry: br label %loop @@ -41,7 +54,7 @@ br label %loop } -; Should be able to hoist since store doesn't alias +; By default, should be able to hoist since store doesn't alias define void @test3(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test3( ; CHECK-LABEL: entry: @@ -49,6 +62,11 @@ ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) ; CHECK: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: @test3( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) entry: br label %loop @@ -72,6 +90,12 @@ ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) ; CHECK: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: @test4( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: store i32 0, i32* %ptr +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) +; ALIAS-N2: %val = load i32, i32* %ptr entry: br label %loop @@ -93,6 +117,12 @@ ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) ; CHECK: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: @test5( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: store i32 0, i32* %ptr +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) +; ALIAS-N2: %val = load i32, i32* %ptr entry: br label %loop