diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -519,6 +519,22 @@ // Returns a wider type among {Ty1, Ty2}. Type *getWiderType(Type *Ty1, Type *Ty2) const; + /// Return true if there exists a point in the program at which both + /// A and B could be operands to the same instruction. + /// SCEV expressions are generally assumed to correspond to instructions + /// which could exists in IR. In general, this requires that there exists + /// a use point in the program where all operands dominate the use. + /// + /// Example: + /// loop { + /// if + /// loop { v1 = load @global1; } + /// else + /// loop { v2 = load @global2; } + /// } + /// No SCEV with operand V1, and v2 can exist in this program. + bool instructionCouldExistWitthOperands(const SCEV *A, const SCEV *B); + /// Return true if the SCEV is a scAddRecExpr or it contains /// scAddRecExpr. The result will be cached in HasRecMap. bool containsAddRecurrence(const SCEV *S); @@ -1947,7 +1963,13 @@ const Instruction *getNonTrivialDefiningScopeBound(const SCEV *S); /// Return a scope which provides an upper bound on the defining scope for - /// a SCEV with the operands in Ops. + /// a SCEV with the operands in Ops. The outparam Precise is set if the + /// bound found is a precise bound (i.e. must be the defining scope.) + const Instruction *getDefiningScopeBound(ArrayRef Ops, + bool &Precise); + + /// Wrapper around the above for cases which don't care if the bound + /// is precise. const Instruction *getDefiningScopeBound(ArrayRef Ops); /// Given two instructions in the same function, return true if we can diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3997,6 +3997,21 @@ return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2; } +bool ScalarEvolution::instructionCouldExistWitthOperands(const SCEV *A, + const SCEV *B) { + /// For a valid use point to exist, the defining scope of one operand + /// must dominate the other. + bool PreciseA, PreciseB; + auto *ScopeA = getDefiningScopeBound({A}, PreciseA); + auto *ScopeB = getDefiningScopeBound({B}, PreciseB); + if (!PreciseA || !PreciseB) + // Can't tell. + return false; + return (ScopeA == ScopeB) || DT.dominates(ScopeA, ScopeB) || + DT.dominates(ScopeB, ScopeA); +} + + const SCEV *ScalarEvolution::getCouldNotCompute() { return CouldNotCompute.get(); } @@ -6612,7 +6627,9 @@ } const Instruction * -ScalarEvolution::getDefiningScopeBound(ArrayRef Ops) { +ScalarEvolution::getDefiningScopeBound(ArrayRef Ops, + bool &Precise) { + Precise = true; // Do a bounded search of the def relation of the requested SCEVs. SmallSet Visited; SmallVector Worklist; @@ -6620,8 +6637,10 @@ if (!Visited.insert(S).second) return; // Threshold of 30 here is arbitrary. - if (Visited.size() > 30) + if (Visited.size() > 30) { + Precise = false; return; + } Worklist.push_back(S); }; @@ -6644,6 +6663,12 @@ return Bound ? Bound : &*F.getEntryBlock().begin(); } +const Instruction * +ScalarEvolution::getDefiningScopeBound(ArrayRef Ops) { + bool Discard; + return getDefiningScopeBound(Ops, Discard); +} + bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A, const Instruction *B) { if (A->getParent() == B->getParent() && diff --git a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp --- a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -23,6 +23,15 @@ #include "llvm/InitializePasses.h" using namespace llvm; +static bool canComputePointerDiff(ScalarEvolution &SE, + const SCEV *A, const SCEV *B) { + if (SE.getEffectiveSCEVType(A->getType()) != + SE.getEffectiveSCEVType(B->getType())) + return false; + + return SE.instructionCouldExistWitthOperands(A, B); +} + AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI) { // If either of the memory references is empty, it doesn't matter what the @@ -41,8 +50,7 @@ // If something is known about the difference between the two addresses, // see if it's enough to prove a NoAlias. - if (SE.getEffectiveSCEVType(AS->getType()) == - SE.getEffectiveSCEVType(BS->getType())) { + if (canComputePointerDiff(SE, AS, BS)) { unsigned BitWidth = SE.getTypeSizeInBits(AS->getType()); APInt ASizeInt(BitWidth, LocA.Size.hasValue() ? LocA.Size.getValue() diff --git a/llvm/test/Analysis/ScalarEvolution/scev-aa.ll b/llvm/test/Analysis/ScalarEvolution/scev-aa.ll --- a/llvm/test/Analysis/ScalarEvolution/scev-aa.ll +++ b/llvm/test/Analysis/ScalarEvolution/scev-aa.ll @@ -212,6 +212,130 @@ ret void } -; CHECK: 14 no alias responses -; CHECK: 26 may alias responses +; CHECK: Function: test_no_dom: 3 pointers, 0 call sites +; CHECK: MayAlias: double* %addr1, double* %data +; CHECK: NoAlias: double* %addr2, double* %data +; CHECK: MayAlias: double* %addr1, double* %addr2 + +; In this case, checking %addr1 and %add2 involves two addrecs in two +; different loops where neither dominates the other. This used to crash +; because we expected the arguments to an AddExpr to have a strict +; dominance order. +define void @test_no_dom(double* %data) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.latch ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 undef, label %subloop1, label %subloop2 + +subloop1: + %iv1 = phi i32 [0, %for.body], [%iv1.next, %subloop1] + %iv1.next = add i32 %iv1, 1 + %addr1 = getelementptr double, double* %data, i32 %iv1 + store double 0.0, double* %addr1 + %cmp1 = icmp slt i32 %iv1, 200 + br i1 %cmp1, label %subloop1, label %for.latch + +subloop2: + %iv2 = phi i32 [400, %for.body], [%iv2.next, %subloop2] + %iv2.next = add i32 %iv2, 1 + %addr2 = getelementptr double, double* %data, i32 %iv2 + store double 0.0, double* %addr2 + %cmp2 = icmp slt i32 %iv2, 600 + br i1 %cmp2, label %subloop2, label %for.latch + +for.latch: + br label %for.body + +for.end: + ret void +} + +declare double* @get_addr(i32 %i) + +; CHECK: Function: test_no_dom2: 3 pointers, 2 call sites +; CHECK: MayAlias: double* %addr1, double* %data +; CHECK: MayAlias: double* %addr2, double* %data +; CHECK: MayAlias: double* %addr1, double* %addr2 + +; In this case, checking %addr1 and %add2 involves two addrecs in two +; different loops where neither dominates the other. This is analogous +; to test_no_dom, but involves SCEVUnknown as opposed to SCEVAddRecExpr. +define void @test_no_dom2(double* %data) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.latch ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 undef, label %subloop1, label %subloop2 + +subloop1: + %iv1 = phi i32 [0, %for.body], [%iv1.next, %subloop1] + %iv1.next = add i32 %iv1, 1 + %addr1 = call double* @get_addr(i32 %iv1) + store double 0.0, double* %addr1 + %cmp1 = icmp slt i32 %iv1, 200 + br i1 %cmp1, label %subloop1, label %for.latch + +subloop2: + %iv2 = phi i32 [400, %for.body], [%iv2.next, %subloop2] + %iv2.next = add i32 %iv2, 1 + %addr2 = call double* @get_addr(i32 %iv2) + store double 0.0, double* %addr2 + %cmp2 = icmp slt i32 %iv2, 600 + br i1 %cmp2, label %subloop2, label %for.latch + +for.latch: + br label %for.body + +for.end: + ret void +} + + +; CHECK: Function: test_dom: 3 pointers, 0 call sites +; CHECK: MayAlias: double* %addr1, double* %data +; CHECK: NoAlias: double* %addr2, double* %data +; CHECK: NoAlias: double* %addr1, double* %addr2 + +; This is a variant of test_non_dom where the second subloop is +; dominated by the first. As a result of that, we can nest the +; addrecs and cancel out the %data base pointer. +define void @test_dom(double* %data) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.latch ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %subloop1 + +subloop1: + %iv1 = phi i32 [0, %for.body], [%iv1.next, %subloop1] + %iv1.next = add i32 %iv1, 1 + %addr1 = getelementptr double, double* %data, i32 %iv1 + store double 0.0, double* %addr1 + %cmp1 = icmp slt i32 %iv1, 200 + br i1 %cmp1, label %subloop1, label %subloop2 + +subloop2: + %iv2 = phi i32 [400, %subloop1], [%iv2.next, %subloop2] + %iv2.next = add i32 %iv2, 1 + %addr2 = getelementptr double, double* %data, i32 %iv2 + store double 0.0, double* %addr2 + %cmp2 = icmp slt i32 %iv2, 600 + br i1 %cmp2, label %subloop2, label %for.latch + +for.latch: + br label %for.body + +for.end: + ret void +} + +; CHECK: 17 no alias responses +; CHECK: 32 may alias responses ; CHECK: 18 must alias responses