diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -32,6 +32,7 @@ class DominatorTree; class PHITransAddr; class PhiValues; +class LoopInfo; /// A memory dependence query can return one of three different answers. class MemDepResult { @@ -349,6 +350,7 @@ DominatorTree &DT; PhiValues &PV; PredIteratorCache PredCache; + LoopInfo &Loops; unsigned DefaultBlockScanLimit; @@ -359,8 +361,9 @@ public: MemoryDependenceResults(AAResults &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT, - PhiValues &PV, unsigned DefaultBlockScanLimit) - : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV), + PhiValues &PV, LoopInfo &Loops, + unsigned DefaultBlockScanLimit) + : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV), Loops(Loops), DefaultBlockScanLimit(DefaultBlockScanLimit) {} /// Handle invalidation in the new PM. @@ -457,6 +460,13 @@ Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA); + /// Checks if QueryInst and DepInst are in the same access group + /// + /// Checks the llvm.loop.parallel_access, to help the alias + /// analysis and find a more appropriate dependent instruction. + bool isSameLoopAccessGroup(Instruction *QueryInst, Instruction *DepInst, + BasicBlock *BB); + /// This analysis looks for other loads and stores with invariant.group /// metadata and the same pointer operand. Returns Unknown if it does not /// find anything, and Def if it can be assumed that 2 instructions load or diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/PHITransAddr.h" @@ -366,6 +367,34 @@ return MemDepResult::getNonLocal(); } +bool MemoryDependenceResults::isSameLoopAccessGroup(Instruction *QueryInst, + Instruction *DepInst, + BasicBlock *BB) { + if (!QueryInst) + return false; + + MDNode *MD1 = QueryInst->getMetadata(LLVMContext::MD_access_group); + MDNode *MD2 = DepInst->getMetadata(LLVMContext::MD_access_group); + if (!MD1 || (MD1 != MD2)) + return false; + + // If the store and load are both part of the same access group, + // and this access group is referenced in the loop metadata, then + // we can assume that the store does not alias with the load. + if (const Loop *L = Loops.getLoopFor(BB)) { + if (MDNode *ParallelAccesses = + findOptionMDForLoop(L, "llvm.loop.parallel_accesses")) { + for (const MDOperand &PMD : drop_begin(ParallelAccesses->operands(), 1)) { + MDNode *AccGroup = cast(PMD.get()); + if (AccGroup == MD1) { + return true; + } + } + } + } + return false; +} + MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, @@ -512,6 +541,10 @@ // If we found a pointer, check if it could be the same as our pointer. AliasResult R = BatchAA.alias(LoadLoc, MemLoc); + if ((R != AliasResult::MustAlias) && + isSameLoopAccessGroup(QueryInst, LI, BB)) + R = AliasResult::NoAlias; + if (isLoad) { if (R == AliasResult::NoAlias) continue; @@ -579,6 +612,10 @@ // If we found a pointer, check if it could be the same as our pointer. AliasResult R = BatchAA.alias(StoreLoc, MemLoc); + if ((R != AliasResult::MustAlias) && + isSameLoopAccessGroup(QueryInst, SI, BB)) + R = AliasResult::NoAlias; + if (R == AliasResult::NoAlias) continue; if (R == AliasResult::MustAlias) @@ -1739,7 +1776,9 @@ auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); auto &PV = AM.getResult(F); - return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit); + auto &Loops = AM.getResult(F); + return MemoryDependenceResults(AA, AC, TLI, DT, PV, Loops, + DefaultBlockScanLimit); } char MemoryDependenceWrapperPass::ID = 0; @@ -1751,6 +1790,7 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) @@ -1769,6 +1809,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); } @@ -1785,7 +1826,8 @@ if (Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || - Inv.invalidate(F, PA)) + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA)) return true; // Otherwise this analysis result remains valid. @@ -1802,6 +1844,7 @@ auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); auto &PV = getAnalysis().getResult(); - MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit); + auto &Loops = getAnalysis().getLoopInfo(); + MemDep.emplace(AA, AC, TLI, DT, PV, Loops, BlockScanLimit); return false; } diff --git a/llvm/test/Transforms/GVN/access_group.ll b/llvm/test/Transforms/GVN/access_group.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GVN/access_group.ll @@ -0,0 +1,35 @@ +; REQUIRES: asserts +; RUN: opt -gvn -S < %s -debug -o /dev/null 2>&1 | FileCheck %s + +; CHECK: GVN removed: %2 = load double, double* %gep1, align 8, !llvm.access.group !0 +; CHECK: GVN removed: %2 = load double, double* %gep2, align 8, !llvm.access.group !0 + +define void @foo(double* %out, double* %in1, double* %in2, i64 %n) { +entry: + br label %omp.inner.for.body + +omp.inner.for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %omp.inner.for.body ] + %gep1 = getelementptr inbounds double, double* %in1, i64 %iv + %gep2 = getelementptr inbounds double, double* %in2, i64 %iv + %0 = load double, double* %gep1, !llvm.access.group !0 + %1 = load double, double* %gep2, !llvm.access.group !0 + %mul = fmul contract double %0, %1 + %gepout = getelementptr inbounds double, double* %out, i64 %iv + store double %mul, double* %gepout, !llvm.access.group !0 + %2 = load double, double* %gep1, !llvm.access.group !0 + %3 = load double, double* %gep2, !llvm.access.group !0 + %add = fadd contract double %2, %3 + store double %add, double* %gepout, !llvm.access.group !0 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !1 + +simd.if.end: + ret void +} + +!0 = distinct !{} +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.parallel_accesses", !0} +!3 = !{!"llvm.loop.vectorize.enable", i1 true}