Index: include/llvm/Analysis/MemoryDependenceAnalysis.h =================================================================== --- include/llvm/Analysis/MemoryDependenceAnalysis.h +++ include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -397,7 +397,7 @@ /// critical edges. void invalidateCachedPredecessors(); - /// getPointerDependencyFrom - Return the instruction on which a memory + /// \brief - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases /// with read-only operations. If isLoad is false, this routine ignores /// may-aliases with reads from read-only locations. If possible, pass @@ -412,6 +412,17 @@ BasicBlock *BB, Instruction *QueryInst = nullptr); + MemDepResult getSimplePointerDependencyFrom( + const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, + BasicBlock *BB, Instruction *QueryInst); + + + /// \brief - This analysis looks for other + /// loads and stores with invariant.group metadata and the same + /// pointer operand. Returns Unknown if does not find anything, and Def + /// if it can be assumed that 2 instructions load or store the same value. + MemDepResult getInvariantGroupPointerDependency(LoadInst *LI); + /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that /// looks at a memory location for a load (specified by MemLocBase, Offs, /// and Size) and compares it against a load. If the specified load could Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -380,6 +380,52 @@ const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { + if (QueryInst != nullptr) + if (auto *LI = dyn_cast(QueryInst)) { + MemDepResult invariantGroupPointerDependency = + getInvariantGroupPointerDependency(LI); + if (!invariantGroupPointerDependency.isUnknown()) + return invariantGroupPointerDependency; + } + + return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst); +} + +MemDepResult MemoryDependenceAnalysis::getInvariantGroupPointerDependency( + LoadInst *LI) { + + Value *Ptr = LI->getPointerOperand(); + // It's is not safe to walk the uses of global value, because nobody guarantee + // that uses list won't change during iteration (uses list doesn't have + // any lock and there might be loads in different functions being optimized + // in the same time). + if (isa(Ptr)) + return MemDepResult::getUnknown(); + + auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group); + if (!InvariantGroupMD) + return MemDepResult::getUnknown(); + + for (Use &Us : Ptr->uses()) { + auto *U = dyn_cast(Us.getUser()); + if (!U || U == LI || DT->dominates(LI, Us)) + continue; + + // If we hit load/store with the same invariant.group metadata (and the + // same pointer operand) we can assume that value pointed by pointer + // operand didn't change. + if ((isa(U) || isa(U)) && + U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) + return MemDepResult::getDef(U); + } + + return MemDepResult::getUnknown(); +} + +MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom( + const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, + BasicBlock *BB, Instruction *QueryInst) { + const Value *MemLocBase = nullptr; int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -761,6 +761,7 @@ LLVMContext::MD_range, LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group }; combineMetadata(NLI, &LI, KnownIDs); }; Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -1669,6 +1669,9 @@ if (Tags) NewLoad->setAAMetadata(Tags); + if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group)) + NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); + // Transfer DebugLoc. NewLoad->setDebugLoc(LI->getDebugLoc()); @@ -1852,6 +1855,7 @@ LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_invariant_group }; combineMetadata(ReplInst, I, KnownIDs); } @@ -2106,6 +2110,8 @@ if (it != ReplaceWithConstMap.end()) { assert(!isa(Operand) && "Replacing constants with constants is invalid"); + DEBUG(dbgs() << "GVN replacing: " << *Operand << + " with " << *it->second << " in instruction " << *Instr << '\n'); Instr->setOperand(OpNum, it->second); Changed = true; } @@ -2461,7 +2467,6 @@ return Changed; } - bool GVN::processBlock(BasicBlock *BB) { // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function // (and incrementing BI before processing an instruction). @@ -2476,6 +2481,7 @@ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + if (!ReplaceWithConstMap.empty()) ChangedFunction |= replaceOperandsWithConsts(BI); ChangedFunction |= processInstruction(BI); Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -748,6 +748,7 @@ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, + LLVMContext::MD_invariant_group }; combineMetadata(C, cpy, KnownIDs); Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -1378,7 +1378,8 @@ return true; } -void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef KnownIDs) { +void llvm::combineMetadata(Instruction *K, const Instruction *J, + ArrayRef KnownIDs) { SmallVector, 4> Metadata; K->dropUnknownNonDebugMetadata(KnownIDs); K->getAllMetadataOtherThanDebugLoc(Metadata); @@ -1416,8 +1417,21 @@ // Only set the !nonnull if it is present in both instructions. K->setMetadata(Kind, JMD); break; + case LLVMContext::MD_invariant_group: + // Preserve !invariant.group in K. + break; } } + // Set !invariant.group from J if J has it. If both instructions have it + // then we will just pick it from J - even when they are different. + // Also make sure that K is load or store - f.e. combining bitcast with load + // could produce bitcast with invariant.group metadata, which is invalid. + // FIXME: we should try to preserve both invariant.group md if they are + // different, but right now instruction can only have one invariant.group. + if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group)) + if (isa(K) || isa(K)) + K->setMetadata(LLVMContext::MD_invariant_group, JMD); + } unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -1099,7 +1099,8 @@ LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull + LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group }; combineMetadata(I1, I2, KnownIDs); I2->eraseFromParent(); Index: lib/Transforms/Vectorize/BBVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/BBVectorize.cpp +++ lib/Transforms/Vectorize/BBVectorize.cpp @@ -3124,7 +3124,8 @@ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, - LLVMContext::MD_fpmath + LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_group }; combineMetadata(K, H, KnownIDs); K->intersectOptionalDataWith(H); Index: test/Transforms/GVN/invariant.group.ll =================================================================== --- /dev/null +++ test/Transforms/GVN/invariant.group.ll @@ -0,0 +1,338 @@ +; RUN: opt < %s -gvn -S | FileCheck %s + + +%struct.A = type { i32 (...)** } + +@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8 +@_ZTI1A = external constant i8* + +@unknownPtr = external global i8 + +; CHECK-LABEL: define i8 @simple() { +define i8 @simple() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load i8, i8* %ptr, !invariant.group !0 + %c = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable1() { +define i8 @optimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + + call void @foo(i8* %ptr2); call to use %ptr2 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable2() { +define i8 @optimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + store i8 13, i8* %ptr ; can't use this store with invariant.group + %a = load i8, i8* %ptr + call void @bar(i8 %a) ; call to use %a + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 42 + ret i8 %b +} + +; CHECK-LABEL: define i8 @unoptimizable1() { +define i8 @unoptimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define void @indirectLoads() { +define void @indirectLoads() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer(i8* null) + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !2 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + call void @llvm.assume(i1 %cmp.vtables) + + store %struct.A* %1, %struct.A** %a, align 8 + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + %6 = load %struct.A*, %struct.A** %a, align 8 + %7 = bitcast %struct.A* %6 to void (%struct.A*)*** + +; FIXME: This load could be merged with load of %vtable +; CHECK: %vtable2 = load {{.*}} !invariant.group + %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !2 + %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0 +; CHECK: %[[A:.*]] = {{.*}} %vtable2, align 8 + %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8 + +; CHECK: call void %[[A]]( + call void %8(%struct.A* %6) + %9 = load %struct.A*, %struct.A** %a, align 8 + %10 = bitcast %struct.A* %9 to void (%struct.A*)*** + + %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !2 + %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0 +; CHECK: %[[B:.*]] = {{.*}} %vtable2, align 8 + %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8 +; CHECK: call void %[[B]]( + call void %11(%struct.A* %9) + + ret void +} + +; CHECK-LABEL: define void @combiningBitCastWithLoad() { +define void @combiningBitCastWithLoad() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer(i8* null) + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !2 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + + store %struct.A* %1, %struct.A** %a, align 8 +; CHECK-NOT: !invariant.group + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + + ret void +} + + +; CHECK-LABEL:define void @loadCombine() { +define void @loadCombine() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK-NOT: load + %b = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[A]]) + call void @bar(i8 %a) +; CHECK: call void @bar(i8 %[[A]]) + call void @bar(i8 %b) + ret void +} + +; CHECK-LABEL: define void @loadCombine1() { +define void @loadCombine1() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group + %c = load i8, i8* %ptr +; CHECK-NOT: load + %d = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[D]]) + call void @bar(i8 %c) +; CHECK: call void @bar(i8 %[[D]]) + call void @bar(i8 %d) + ret void +} + +; CHECK-LABEL: define void @loadCombine2() { +define void @loadCombine2() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group + %e = load i8, i8* %ptr, !invariant.group !1 +; CHECK-NOT: load + %f = load i8, i8* %ptr +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %e) +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %f) + ret void +} + +; CHECK-LABEL: define void @loadCombine3() { +define void @loadCombine3() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group ![[OneMD:[0-9]]] + %e = load i8, i8* %ptr, !invariant.group !1 +; CHECK-NOT: load + %f = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %e) +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %f) + ret void +} + +; CHECK-LABEL: define i8 @unoptimizable2() { +define i8 @unoptimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @unoptimizable3() { +define i8 @unoptimizable3() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @getPointer(i8* %ptr) + %a = load i8, i8* %ptr2, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @unoptimizable4() { +define i8 @unoptimizable4() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %a = load i8, i8* %ptr2, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @volatile1() { +define i8 @volatile1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load volatile i8, i8* %ptr +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %c = load volatile i8, i8* %ptr, !invariant.group !0 +; FIXME: we could change %c to 42, preserving volatile load +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @volatile2() { +define i8 @volatile2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load volatile i8, i8* %ptr +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %c = load volatile i8, i8* %ptr, !invariant.group !0 +; FIXME: we could change %c to 42, preserving volatile load +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) +; CHECK: ret i8 42 + ret i8 %a +} + + +; CHECK-LABEL: define i8 @fun() { +define i8 @fun() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change +; CHECK: call void @bar(i8 42) + call void @bar(i8 %a) + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %newPtr = call i8* @getPointer(i8* %ptr) + %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) + + %unknownValue = load i8, i8* @unknownPtr +; FIXME: Can assume that %unknownValue == 42 +; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0 + store i8 %unknownValue, i8* %ptr, !invariant.group !0 + + %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr +; CHECK: ret i8 %d + ret i8 %d +} + +declare void @foo(i8*) +declare void @bar(i8) +declare i8* @getPointer(i8*) +declare void @_ZN1A3fooEv(%struct.A*) +declare void @_ZN1AC1Ev(%struct.A*) +declare i8* @llvm.invariant.group.barrier(i8*) + +; Function Attrs: nounwind +declare void @llvm.assume(i1 %cmp.vtables) #0 + + +attributes #0 = { nounwind } +; CHECK: ![[OneMD]] = !{!"other ptr"} +!0 = !{!"magic ptr"} +!1 = !{!"other ptr"} +!2 = !{!"vtable_of_a"} \ No newline at end of file