Index: include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- include/llvm/Transforms/Utils/MemorySSA.h +++ include/llvm/Transforms/Utils/MemorySSA.h @@ -531,6 +531,7 @@ return LiveOnEntryDef.get(); } + using InvariantGroupAccesses = SmallVector; using AccessList = iplist; /// \brief Return the list of MemoryAccess's for a given basic block. @@ -629,6 +630,12 @@ return It == PerBlockAccesses.end() ? nullptr : It->second.get(); } + const InvariantGroupAccesses * + getInvariantGroupBlockAccesses(const BasicBlock *BB) const { + auto It = PerBlockInvariantGroupAccesses.find(BB); + return It == PerBlockInvariantGroupAccesses.end() ? nullptr : &It->second; + } + private: class CachingWalker; class OptimizeUses; @@ -639,6 +646,8 @@ void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const; using AccessMap = DenseMap>; + using UseOrDefAccessMap = + DenseMap; void determineInsertionPoint(const SmallPtrSetImpl &DefiningBlocks); @@ -665,6 +674,7 @@ // Memory SSA mappings DenseMap ValueToMemoryAccess; AccessMap PerBlockAccesses; + UseOrDefAccessMap PerBlockInvariantGroupAccesses; std::unique_ptr LiveOnEntryDef; // Domination mappings Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- lib/Transforms/Utils/MemorySSA.cpp +++ lib/Transforms/Utils/MemorySSA.cpp @@ -1242,7 +1242,7 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); - + if (Res.second) Res.first->second = make_unique(); return Res.first->second.get(); @@ -1285,6 +1285,21 @@ void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, SmallVectorImpl &, DenseMap &); + /// Maps load/store pointer operand with its group to the most dominating + /// defining access. + using MostDominatingInvariantGroupMap = + DenseMap, MemoryAccess *>; + + struct InsertedInvariantGroupInfo { + Value *PointerOperand; + MDNode *InvariantGroup; + MemoryAccess *DefiningAccess; + }; + /// Optimizes memory uses based on !invariant.group metadata. + void optimizeInvariantGroupUsesInBlock( + const BasicBlock *, MostDominatingInvariantGroupMap &, + SmallVectorImpl &); + MemorySSA *MSSA; MemorySSAWalker *Walker; AliasAnalysis *AA; @@ -1319,6 +1334,8 @@ BasicBlock *BackBlock = VersionStack.back()->getBlock(); if (DT->dominates(BackBlock, BB)) break; + // FIXME: This doesn't check if stack if stack empty. + // fix it, or write a comment if sentinel exists. while (VersionStack.back()->getBlock() == BackBlock) VersionStack.pop_back(); ++PopEpoch; @@ -1441,6 +1458,72 @@ } } +void MemorySSA::OptimizeUses::optimizeInvariantGroupUsesInBlock( + const BasicBlock *BB, + MostDominatingInvariantGroupMap &MostDominatingInvariantGroup, + SmallVectorImpl &InsertedInvariants) { + + const auto *InvariantGroupAccesses = MSSA->getInvariantGroupBlockAccesses(BB); + if (!InvariantGroupAccesses) + return; + + // Erase inserted information from not dominated blocks. + BasicBlock *LastBlock = nullptr; + while (!InsertedInvariants.empty()) { + BasicBlock *const CurrentBlock = + InsertedInvariants.back().DefiningAccess->getBlock(); + // Cache Last block to avoid checking for dominance. + if (LastBlock != CurrentBlock) { + LastBlock = CurrentBlock; + if (DT->dominates(CurrentBlock, BB)) + break; + } + + InsertedInvariantGroupInfo GI = InsertedInvariants.back(); + MostDominatingInvariantGroup.erase({GI.PointerOperand, GI.InvariantGroup}); + InsertedInvariants.pop_back(); + } + + // Walk all loads/stores with !invariant.group, checking if there is + // dominating memory access with coresponding pointer operand and invariant + // group. If found then use it, if not then add it to map. + for (MemoryUseOrDef *MU : *InvariantGroupAccesses) { + auto *I = MU->getMemoryInst(); + InsertedInvariantGroupInfo GI; + GI.InvariantGroup = I->getMetadata(LLVMContext::MD_invariant_group); + if (auto *LI = dyn_cast(I)) + GI.PointerOperand = LI->getPointerOperand()->stripPointerCasts(); + else if (auto *SI = dyn_cast(I)) + GI.PointerOperand = SI->getPointerOperand()->stripPointerCasts(); + assert(GI.PointerOperand && GI.InvariantGroup && "Nonnulls inserted"); + + const auto It = MostDominatingInvariantGroup.find( + {GI.PointerOperand, GI.InvariantGroup}); + // We found dominating Instruction with the same operand and group. + if (It != MostDominatingInvariantGroup.end()) { + // Can't optimize Defs. + if (isa(MU)) + continue; + + // Skip liveOnEntry uses to not pesimize. + if (MSSA->isLiveOnEntryDef(MU->getDefiningAccess())) + continue; + MU->setDefiningAccess(It->second); + } else { + MemoryAccess *InsertingAccess; + if (isa(MU)) + InsertingAccess = MU->getDefiningAccess(); + else // MemoryDef + InsertingAccess = MU; + + GI.DefiningAccess = InsertingAccess; + MostDominatingInvariantGroup[{GI.PointerOperand, GI.InvariantGroup}] = + InsertingAccess; + InsertedInvariants.push_back(GI); + } + } +} + /// Optimize uses to point to their actual clobbering definitions. void MemorySSA::OptimizeUses::optimizeUses() { @@ -1451,15 +1534,21 @@ }; SmallVector VersionStack; + // FIXME: this is not used. SmallVector DomTreeWorklist; DenseMap LocStackInfo; VersionStack.push_back(MSSA->getLiveOnEntryDef()); + MostDominatingInvariantGroupMap InvariantGroups; + SmallVector InsertedInvariantGroups; unsigned long StackEpoch = 1; unsigned long PopEpoch = 1; - for (const auto *DomNode : depth_first(DT->getRootNode())) + for (const auto *DomNode : depth_first(DT->getRootNode())) { optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, LocStackInfo); + optimizeInvariantGroupUsesInBlock(DomNode->getBlock(), InvariantGroups, + InsertedInvariantGroups); + } } void MemorySSA::placePHINodes( @@ -1487,6 +1576,10 @@ } } +static bool hasInvariantGroupMD(const Instruction &I) { + return I.getMetadata(LLVMContext::MD_invariant_group) != nullptr; +} + void MemorySSA::buildMemorySSA() { // We create an access to represent "live on entry", for things like // arguments or users of globals, where the memory they use is defined before @@ -1505,6 +1598,7 @@ // stream. SmallPtrSet DefiningBlocks; SmallPtrSet DefUseBlocks; + SmallVector InvariantGroupAccesses; // Go through each block, figure out where defs occur, and chain together all // the accesses. for (BasicBlock &B : F) { @@ -1520,11 +1614,19 @@ if (!Accesses) Accesses = getOrCreateAccessList(&B); Accesses->push_back(MUD); + + if (hasInvariantGroupMD(I)) + InvariantGroupAccesses.push_back(MUD); } if (InsertIntoDef) DefiningBlocks.insert(&B); if (Accesses) DefUseBlocks.insert(&B); + if (!InvariantGroupAccesses.empty()) { + PerBlockInvariantGroupAccesses.try_emplace( + &B, std::move(InvariantGroupAccesses)); + InvariantGroupAccesses.clear(); + } } placePHINodes(DefiningBlocks, BBNumbers); Index: test/Transforms/Util/MemorySSA/invariant-groups.ll =================================================================== --- test/Transforms/Util/MemorySSA/invariant-groups.ll +++ test/Transforms/Util/MemorySSA/invariant-groups.ll @@ -26,7 +26,27 @@ %2 = load i32, i32* %a32, align 4, !invariant.group !0 ret i32 %2 } +; CHECK-LABEL: define i8 @differentGroups +define i8 @differentGroups(i8* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) + store i8 0, i8* %a, align 4, !invariant.group !0 +; CHECK: 2 = MemoryDef(1) + store i8 0, i8* %a, align 4, !invariant.group !1 +; CHECK 3 = MemoryDef(2) + call void @clobber8(i8* %a) +; CHECK: MemoryUse(1) + %1 = load i8, i8* %a, !invariant.group !0 +; CHECK: MemoryUse(2) + %2 = load i8, i8* %a, !invariant.group !1 +; CHECK: MemoryUse(3) + %3 = load i8, i8* %a, !invariant.group !2 +; CHECK: MemoryUse(1) + %4 = load i8, i8* %a, !invariant.group !0 + ret i8 %4 +} + +; CHECK-LABEL: define i32 @skipBarrier define i32 @skipBarrier(i32* %a) { ; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: store i32 0 @@ -61,8 +81,7 @@ ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 -; FIXME: based on invariant.group it should be MemoryUse(liveOnEntry) -; CHECK: MemoryUse(1) +; CHECK: MemoryUse(liveOnEntry) ; CHECK-NEXT: %v3 = load i32 %v3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %v2, %v3 @@ -90,8 +109,7 @@ ; CHECK-NEXT: store i32 2 store i32 2, i32* @g, align 4 -; FIXME: This can be changed to MemoryUse(2) -; CHECK: MemoryUse(3) +; CHECK: MemoryUse(2) ; CHECK-NEXT: %3 = load i32 %3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %2, %3 @@ -110,15 +128,13 @@ br i1 %a, label %Loop.Body, label %Loop.End Loop.Body: -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(2) +; CHECK: MemoryUse(1) ; CHECK-NEXT: %1 = load i32 %1 = load i32, i32* %0, !invariant.group !0 br i1 %a, label %Loop.End, label %Loop.Body Loop.End: -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(2) +; CHECK: MemoryUse(1) ; CHECK-NEXT: %2 = load %2 = load i32, i32* %0, align 4, !invariant.group !0 br i1 %a, label %Ret, label %Loop.Body @@ -144,8 +160,7 @@ ; CHECK-NEXT: %0 = load i8 %0 = load i8, i8* %after, !invariant.group !0 -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(4) +; CHECK: MemoryUse(1) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %p, !invariant.group !0 @@ -158,19 +173,17 @@ ; 5 = MemoryPhi({entry,2},{Loop.Body,3}) ; CHECK: MemoryUse(5) ; CHECK-NEXT: %2 = load - %2 = load i8, i8* %after, align 4, !invariant.group !0 + %2 = load i8, i8* %after, !invariant.group !0 -; FIXME: MemoryUse(1) -; CHECK: MemoryUse(5) +; CHECK: MemoryUse(1) ; CHECK-NEXT: %3 = load - %3 = load i8, i8* %p, align 4, !invariant.group !0 + %3 = load i8, i8* %p, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body Ret: ret i8 %3 } - define i8 @loop3(i8* %p) { entry: ; CHECK: 1 = MemoryDef(liveOnEntry) @@ -192,8 +205,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(6) -; CHECK: MemoryUse(3) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 @@ -203,8 +215,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(6) -; CHECK: MemoryUse(4) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %after, !invariant.group !0 @@ -220,8 +231,7 @@ ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(7) -; CHECK: MemoryUse(5) +; CHECK: MemoryUse(7) ; CHECK-NEXT: %4 = load %4 = load i8, i8* %after, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -248,27 +258,26 @@ br label %Loop.Body Loop.Body: ; CHECK: 4 = MemoryPhi({Loop.Pre,2},{Loop.Body,3},{Loop.End,5}) -; CHECK-NEXT: MemoryUse(4) +; CHECK-NEXT: MemoryUse(2) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 -; FIXME: MemoryUse(2) -; CHECK: MemoryUse(4) +; CHECK: MemoryUse(1) ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %p, !invariant.group !0 ; CHECK: 3 = MemoryDef(4) +; CHECK-NEXT: store store i8 4, i8* %after, !invariant.group !0 br i1 undef, label %Loop.End, label %Loop.Body Loop.End: ; CHECK: 5 = MemoryPhi({entry,2},{Loop.Body,3}) -; CHECK-NEXT: MemoryUse(5) +; CHECK-NEXT: MemoryUse(2) ; CHECK-NEXT: %3 = load %3 = load i8, i8* %after, align 4, !invariant.group !0 -; FIXME: MemoryUse(2) -; CHECK: MemoryUse(5) +; CHECK: MemoryUse(1) ; CHECK-NEXT: %4 = load %4 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -276,6 +285,144 @@ Ret: ret i8 %3 } +; CHECK-LABEL: define i8 @diamond +define i8 @diamond(i8* %p) { +entry: +; CHECK: 1 = MemoryDef(liveOnEntry) + store i8 4, i8* %p, !invariant.group !0 + %after = call i8* @llvm.invariant.group.barrier(i8* %p) +; CHECK: 2 = MemoryDef(1) + call void @clobber8(i8* %p) + br i1 undef, label %First, label %Ret + +First: ; preds = %entry +; CHECK: MemoryUse(2) + %0 = load i8, i8* %after, !invariant.group !0 +; CHECK: 3 = MemoryDef(2) + call void @clobber8(i8* %p) + br i1 undef, label %Second1, label %Second2 + +Second1: ; preds = %First +; CHECK: 4 = MemoryDef(3) + call void @clobber8(i8* %p) +; CHECK: MemoryUse(4) + %1 = load i8, i8* %after, !invariant.group !1 +; CHECK: MemoryUse(1) + %2 = load i8, i8* %p, !invariant.group !0 +; CHECK: MemoryUse(4) + %3 = load i8, i8* %p, !invariant.group !1 +; CHECK: 5 = MemoryDef(4) + call void @clobber8(i8* %p) + br label %Diamond + +Second2: ; preds = %First +; CHECK: 6 = MemoryDef(3) + call void @clobber8(i8* %p) +; CHECK: MemoryUse(6) + %4 = load i8, i8* %after, !invariant.group !1 +; CHECK: MemoryUse(1) + %5 = load i8, i8* %p, !invariant.group !0 +; CHECK: MemoryUse(6) + %6 = load i8, i8* %p, !invariant.group !1 +; CHECK: 7 = MemoryDef(6) + call void @clobber8(i8* %p) + br label %Diamond + +Diamond: ; preds = %Second2, %Second1 +; CHECK: 11 = MemoryPhi({Second1,5},{Second2,7}) +; CHECK: 8 = MemoryDef(11) + call void @clobber8(i8* %p) +; CHECK: MemoryUse(8) + %7 = load i8, i8* %after, !invariant.group !1 +; CHECK: MemoryUse(1) + %8 = load i8, i8* %p, !invariant.group !0 +; CHECK: MemoryUse(8) + %9 = load i8, i8* %p, !invariant.group !1 +; CHECK: MemoryUse(8) + %10 = load i8, i8* %after, !invariant.group !1 +; CHECK: MemoryUse(8) + %11 = load i8, i8* %after, !invariant.group !2 +; CHECK: 9 = MemoryDef(8) + call void @clobber8(i8* %p) + br label %Ret + +Ret: ; preds = %Diamond, %entry +; CHECK: 12 = MemoryPhi({entry,2},{Diamond,9}) +; CHECK: 10 = MemoryDef(12) + call void @clobber8(i8* %p) +; CHECK: MemoryUse(10) + %12 = load i8, i8* %after, !invariant.group !2 +; CHECK: MemoryUse(10) + %13 = load i8, i8* %after, !invariant.group !1 +; CHECK: MemoryUse(1) + %14 = load i8, i8* %p, !invariant.group !0 +; CHECK: MemoryUse(10) + %15 = load i8, i8* %p, !invariant.group !1 + ret i8 %15 +} + +; From NewGVN/invariant.group.ll +@unknownPtr = external global i8 +; CHECK-LABEL: define void @testGlobal() { +define void @testGlobal() { +; CHECK: MemoryUse(liveOnEntry) + %a = load i8, i8* @unknownPtr, !invariant.group !0 +; CHECK: 1 = MemoryDef(liveOnEntry) + call void @clobber8(i8* @unknownPtr) +; CHECK: MemoryUse(liveOnEntry) + %1 = load i8, i8* @unknownPtr, !invariant.group !0 +; CHECK: 2 = MemoryDef(1) + call void @clobber8(i8* @unknownPtr) + %b0 = bitcast i8* @unknownPtr to i1* +; CHECK: 3 = MemoryDef(2) + call void @clobber8(i8* @unknownPtr) +; CHECK: MemoryUse(liveOnEntry) + %2 = load i1, i1* %b0, !invariant.group !0 +; CHECK: 4 = MemoryDef(3) + call void @clobber8(i8* @unknownPtr) +; CHECK: MemoryUse(liveOnEntry) + %3 = load i1, i1* %b0, !invariant.group !0 + ret void +} + + +%struct.A = type { i32 (...)** } +@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8 +@_ZTI1A = external constant i8* +declare void @_ZN1A3fooEv(%struct.A*) +declare void @_ZN1AC1Ev(%struct.A*) +declare i8* @getPointer(i8*) + +; CHECK-LABEL: define void @combiningBitCastWithLoad() { +define void @combiningBitCastWithLoad() { + %a = alloca %struct.A*, align 8 + %s2 = bitcast %struct.A** %a to i8* +; CHECK: 1 = MemoryDef(liveOnEntry) + call void @clobber8(i8* null) + %x = bitcast i8* %s2 to i1* +; CHECK: 2 = MemoryDef(1) + store i1 1, i1* %x, !invariant.group !0 + %1 = bitcast i8* %s2 to %struct.A* +; CHECK: 3 = MemoryDef(2) + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** +; CHECK: MemoryUse(2) + %vtable = load i8**, i8*** %2, align 8, !invariant.group !0 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) +; CHECK: 4 = MemoryDef(3) + store %struct.A* %1, %struct.A** %a, align 8 +; CHECK: MemoryUse(4) + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** +; CHECK: MemoryUse(4) + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !0 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 +; CHECK: MemoryUse(4) + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 +; CHECK: 5 = MemoryDef(4) + call void %5(%struct.A* %3) + ret void +} declare i8* @llvm.invariant.group.barrier(i8*) declare void @clobber(i32*) @@ -283,3 +430,5 @@ !0 = !{!"group1"} +!1 = !{!"other"} +!2 = !{!"other2"}