Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp @@ -89,6 +89,13 @@ cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)")); +// Default value of zero implies we use the regular alias set tracker mechanism +// instead of the cross product using AA to identify aliasing of the memory +// location we are interested in. +static cl::opt +LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), + cl::desc("How many instruction to cross product using AA")); + static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, @@ -105,8 +112,10 @@ const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI = nullptr); -static bool isInvalidatedByLoop(const MemoryLocation &MemLoc, - AliasSetTracker *CurAST); +static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, + AliasSetTracker *CurAST, Loop *CurLoop, + AliasAnalysis *AA); + static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, @@ -628,7 +637,16 @@ if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; - bool Invalidated = isInvalidatedByLoop(MemoryLocation::get(LI), CurAST); + // Don't hoist loads which have may-aliased stores in loop. + uint64_t Size = 0; + if (LI->getType()->isSized()) + Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); + + AAMDNodes AAInfo; + LI->getAAMetadata(AAInfo); + + bool Invalidated = pointerInvalidatedByLoop( + MemoryLocation(LI->getOperand(0), Size, AAInfo), CurAST, CurLoop, AA); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -669,10 +687,9 @@ if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { for (Value *Op : CI->arg_operands()) if (Op->getType()->isPointerTy() && - isInvalidatedByLoop(MemoryLocation(Op, - MemoryLocation::UnknownSize, - AAMDNodes()), - CurAST)) + pointerInvalidatedByLoop( + MemoryLocation(Op, MemoryLocation::UnknownSize, AAMDNodes()), + CurAST, CurLoop, AA)) return false; return true; } @@ -1569,13 +1586,51 @@ LICM.getLoopToAliasSetMap().erase(L); } -/// Return true if the body of this loop may store into the memory -/// location pointed to by V. -/// -static bool isInvalidatedByLoop(const MemoryLocation &MemLoc, - AliasSetTracker *CurAST) { - // Check to see if any of the basic blocks in CurLoop invalidate *V. - return CurAST->getAliasSetFor(MemLoc).isMod(); +static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, + AliasSetTracker *CurAST, Loop *CurLoop, + AliasAnalysis *AA) { + // First check to see if any of the basic blocks in CurLoop invalidate *V. + bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod(); + + if (!isInvalidatedAccordingToAST || !LICMN2Theshold) + return isInvalidatedAccordingToAST; + + // Check with a diagnostic analysis if we can refine the information above. + // This is to identify the limitations of using the AST. + // The alias set mechanism used by LICM has a major weakness in that it + // combines all things which may alias into a single set *before* asking + // modref questions. As a result, a single readonly call within a loop will + // collapse all loads and stores into a single alias set and report + // invalidation if the loop contains any store. For example, readonly calls + // with deopt states have this form and create a general alias set with all + // loads and stores. In order to get any LICM in loops containing possible + // deopt states we need a more precise invalidation of checking the mod ref + // info of each instruction within the loop and LI. This has a complexity of + // O(N^2), so currently, it is used only as a diagnostic tool since the + // default value of LICMN2Threshold is zero. + + // Don't look at nested loops. + if (CurLoop->begin() != CurLoop->end()) + return true; + + int N = 0; + for (BasicBlock *BB : CurLoop->getBlocks()) + for (Instruction &I : *BB) { + if (N >= LICMN2Theshold) { + LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for " + << *(MemLoc.Ptr) << "\n"); + return true; + } + N++; + auto Res = AA->getModRefInfo(&I, MemLoc); + if (isModSet(Res)) { + LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for " + << *(MemLoc.Ptr) << "\n"); + return true; + } + } + LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n"); + return false; } /// Little predicate that returns true if the specified basic block is in Index: llvm/trunk/test/Transforms/LICM/argmemonly-call.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/argmemonly-call.ll +++ llvm/trunk/test/Transforms/LICM/argmemonly-call.ll @@ -1,5 +1,8 @@ -; RUN: opt -S -basicaa -licm %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -S -basicaa -licm -licm-n2-threshold=0 %s | FileCheck %s +; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2 +; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2 + declare i32 @foo() readonly argmemonly nounwind declare i32 @foo2() readonly nounwind declare i32 @bar(i32* %loc2) readonly argmemonly nounwind @@ -68,3 +71,27 @@ store i32 %res, i32* %loc br label %loop } + +declare i32 @foo_new(i32*) readonly +; With the default AST mechanism used by LICM for alias analysis, +; we clump foo_new with bar. +; With the N2 Alias analysis diagnostic tool, we are able to hoist the +; argmemonly bar call out of the loop. + +define void @test5(i32* %loc2, i32* noalias %loc) { +; ALIAS-N2-LABEL: @test5 +; ALIAS-N2: @bar +; ALIAS-N2-LABEL: loop: + +; CHECK-LABEL: @test5 +; CHECK-LABEL: loop: +; CHECK: @bar + br label %loop + +loop: + %res1 = call i32 @bar(i32* %loc2) + %res = call i32 @foo_new(i32* %loc2) + store volatile i32 %res1, i32* %loc + br label %loop +} + Index: llvm/trunk/test/Transforms/LICM/invariant.start.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/invariant.start.ll +++ llvm/trunk/test/Transforms/LICM/invariant.start.ll @@ -1,7 +1,9 @@ -; RUN: opt -licm -basicaa < %s -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -licm -basicaa -licm-n2-threshold=0 < %s -S | FileCheck %s +; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2 +; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2 -; TODO: should be able to hoist both load and invariant.start +; TODO: By default (without the -licm-n2-threshold value), we should be able to hoist both load and invariant.start define void @test1(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test1( ; CHECK-LABEL: entry: @@ -9,6 +11,12 @@ ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) ; CHECK: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: @test1( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) + entry: br label %loop @@ -20,7 +28,7 @@ br label %loop } -;; TODO: despite the loop varying invariant.start, we should be +;; TODO: By default, despite the loop varying invariant.start, we should be ;; able to hoist the load define void @test2(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test2( @@ -28,7 +36,12 @@ ; CHECK-LABEL: loop: ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %piv) ; CHECK: %val = load i32, i32* %ptr - + +; ALIAS-N2-LABEL: @test2( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %piv) entry: br label %loop @@ -41,7 +54,7 @@ br label %loop } -; Should be able to hoist since store doesn't alias +; By default, should be able to hoist since store doesn't alias define void @test3(i1 %cond, i32* %ptr) { ; CHECK-LABEL: @test3( ; CHECK-LABEL: entry: @@ -49,6 +62,11 @@ ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) ; CHECK: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: @test3( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) entry: br label %loop @@ -72,6 +90,12 @@ ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) ; CHECK: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: @test4( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: store i32 0, i32* %ptr +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) +; ALIAS-N2: %val = load i32, i32* %ptr entry: br label %loop @@ -93,6 +117,12 @@ ; CHECK: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) ; CHECK: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: @test5( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: store i32 0, i32* %ptr +; ALIAS-N2: call {}* @llvm.invariant.start.p0i32(i64 4, i32* %ptr) +; ALIAS-N2: %val = load i32, i32* %ptr entry: br label %loop Index: llvm/trunk/test/Transforms/LICM/read-only-calls.ll =================================================================== --- llvm/trunk/test/Transforms/LICM/read-only-calls.ll +++ llvm/trunk/test/Transforms/LICM/read-only-calls.ll @@ -0,0 +1,85 @@ +; RUN: opt -S -basicaa -licm -licm-n2-threshold=0 %s | FileCheck %s +; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2 +; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2 + +; We should be able to hoist loads in presence of read only calls and stores +; that do not alias. + +; Since LICM uses the AST mechanism for alias analysis, we will clump +; together all loads and stores in one set along with the read-only call. +; This prevents hoisting load that doesn't alias with any other memory +; operations. + +declare void @foo(i64, i32*) readonly + +; hoist the load out with the n2-threshold +; since it doesn't alias with the store. +; default AST mechanism clumps all memory locations in one set because of the +; readonly call +define void @test1(i32* %ptr) { +; CHECK-LABEL: @test1( +; CHECK-LABEL: entry: +; CHECK-LABEL: loop: +; CHECK: %val = load i32, i32* %ptr + +; ALIAS-N2-LABEL: @test1( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: loop: +entry: + br label %loop + +loop: + %x = phi i32 [ 0, %entry ], [ %x.inc, %loop ] + %val = load i32, i32* %ptr + call void @foo(i64 4, i32* %ptr) + %p2 = getelementptr i32, i32* %ptr, i32 1 + store volatile i32 0, i32* %p2 + %x.inc = add i32 %x, %val + br label %loop +} + +; can hoist out load with the default AST and the alias analysis mechanism. +define void @test2(i32* %ptr) { +; CHECK-LABEL: @test2( +; CHECK-LABEL: entry: +; CHECK: %val = load i32, i32* %ptr +; CHECK-LABEL: loop: + +; ALIAS-N2-LABEL: @test2( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2: %val = load i32, i32* %ptr +; ALIAS-N2-LABEL: loop: +entry: + br label %loop + +loop: + %x = phi i32 [ 0, %entry ], [ %x.inc, %loop ] + %val = load i32, i32* %ptr + call void @foo(i64 4, i32* %ptr) + %x.inc = add i32 %x, %val + br label %loop +} + +; cannot hoist load since not guaranteed to execute +define void @test3(i32* %ptr) { +; CHECK-LABEL: @test3( +; CHECK-LABEL: entry: +; CHECK-LABEL: loop: +; CHECK: %val = load i32, i32* %ptr + +; ALIAS-N2-LABEL: @test3( +; ALIAS-N2-LABEL: entry: +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: %val = load i32, i32* %ptr +entry: + br label %loop + +loop: + %x = phi i32 [ 0, %entry ], [ %x.inc, %loop ] + call void @foo(i64 4, i32* %ptr) + %val = load i32, i32* %ptr + %x.inc = add i32 %x, %val + br label %loop +}