Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- lib/Transforms/Utils/MemorySSA.cpp +++ lib/Transforms/Utils/MemorySSA.cpp @@ -703,14 +703,50 @@ return Result; } +/// This does one-way checks to see if Use could theoretically be hoisted above +/// MaybeDef. This will not check the other way around. +/// +/// This assumes that, for the purposes of MemorySSA, Use comes directly after +/// MaybeDef, with no potentially clobbering operations in between them. +/// Where potentially clobbering ops are memory barriers, aliased stores, ... +static bool canUseBeReorderedAboveDef(const Instruction *Use, + const Instruction *MaybeDef) { + // We only check for load-load clobbers; everything else is AA's problem. :) + if (!isa(MaybeDef) || !isa(Use)) + return false; + + auto *LoadDef = cast(MaybeDef); + auto *LoadUse = cast(Use); + // Volatile operations can never be reordered with other volatile operations + if (LoadDef->isVolatile() && LoadUse->isVolatile()) + return false; + + // ...But volatile operations can be reordered with non-volatile operations. + // + // Remember that volatile ordering and atomic ordering are unrelated, so + // whether a variable is volatile or not is now entirely irrelevant. + // + // Note that it's perfectly okay to reorder two monotonic (read: + // std::memory_order_relaxed) loads, even if they alias. It's also technically + // okay to move an acquire load above a monotonic load, but we represent both + // as MemoryDefs, so we can't 'optimize' them. So, we just test for monotonic. + return LoadDef->getOrdering() <= Monotonic && + LoadUse->getOrdering() <= Monotonic; +} + bool CachingMemorySSAWalker::instructionClobbersQuery( const MemoryDef *MD, UpwardsMemoryQuery &Q, const MemoryLocation &Loc) const { Instruction *DefMemoryInst = MD->getMemoryInst(); assert(DefMemoryInst && "Defining instruction not actually an instruction"); - if (!Q.IsCall) + if (!Q.IsCall) { + // If two memory operations can be reordered, then they can't clobber each + // other. + if (canUseBeReorderedAboveDef(Q.Inst, DefMemoryInst)) + return false; return AA->getModRefInfo(DefMemoryInst, Loc) & MRI_Mod; + } // If this is a call, mark it for caching if (ImmutableCallSite(DefMemoryInst)) @@ -898,6 +934,17 @@ if (auto CacheResult = doCacheLookup(StartingAccess, Q, Q.StartingLoc)) return CacheResult; + // If the memory can't be changed, then loads of the memory can't be + // clobbered. + // + // TODO(gbiv): We should handle invariant groups, as well. It's a bit harder, + // because we need to pay close attention to invariant group barriers. + if (isa(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || + AA->pointsToConstantMemory(I))) { + doCacheInsert(StartingAccess, MSSA->getLiveOnEntryDef(), Q, Q.StartingLoc); + return MSSA->getLiveOnEntryDef(); + } + // Start with the thing we already think clobbers this location MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); Index: test/Transforms/Util/MemorySSA/atomic-clobber.ll =================================================================== --- test/Transforms/Util/MemorySSA/atomic-clobber.ll +++ test/Transforms/Util/MemorySSA/atomic-clobber.ll @@ -2,7 +2,8 @@ ; ; Ensures that atomic loads count as MemoryDefs -define i32 @foo(i32* %a, i32* %b) { +; CHECK-LABEL: define void @foo +define void @foo(i32* %a, i32* %b) { ; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: store i32 4 store i32 4, i32* %a, align 4 @@ -12,6 +13,68 @@ ; CHECK: MemoryUse(2) ; CHECK-NEXT: %2 = load i32 %2 = load i32, i32* %a, align 4 - %3 = add i32 %1, %2 - ret i32 %3 + ret void +} + +; CHECK-LABEL: define void @bar +define void @bar(i32* %a) { +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %1 = load atomic i32 + %1 = load atomic i32, i32* %a unordered, align 4 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: %2 = load atomic i32 + %2 = load atomic i32, i32* %a monotonic, align 4 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: %3 = load atomic i32 + %3 = load atomic i32, i32* %a acquire, align 4 +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: %4 = load atomic i32 + %4 = load atomic i32, i32* %a seq_cst, align 4 + ret void +} + +; CHECK-LABEL: define void @baz +define void @baz(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: %1 = load atomic i32 + %1 = load atomic i32, i32* %a acquire, align 4 +; CHECK: MemoryUse(1) +; CHECK-NEXT: %2 = load atomic i32 + %2 = load atomic i32, i32* %a unordered, align 4 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: %3 = load atomic i32 + %3 = load atomic i32, i32* %a monotonic, align 4 + ret void +} + +; CHECK-LABEL: define void @fences +define void @fences(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: fence acquire + fence acquire +; CHECK: MemoryUse(1) +; CHECK-NEXT: %1 = load i32 + %1 = load i32, i32* %a + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: fence release + fence release +; CHECK: MemoryUse(2) +; CHECK-NEXT: %2 = load i32 + %2 = load i32, i32* %a + +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: fence acq_rel + fence acq_rel +; CHECK: MemoryUse(3) +; CHECK-NEXT: %3 = load i32 + %3 = load i32, i32* %a + +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: fence seq_cst + fence seq_cst +; CHECK: MemoryUse(4) +; CHECK-NEXT: %4 = load i32 + %4 = load i32, i32* %a + ret void } Index: test/Transforms/Util/MemorySSA/constant-memory.ll =================================================================== --- /dev/null +++ test/Transforms/Util/MemorySSA/constant-memory.ll @@ -0,0 +1,41 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Things that BasicAA can prove points to constant memory should be +; liveOnEntry, as well. + +declare void @clobberAllTheThings() + +@str = private unnamed_addr constant [2 x i8] c"hi" + +define i8 @foo() { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: call void @clobberAllTheThings() + call void @clobberAllTheThings() + %1 = getelementptr [2 x i8], [2 x i8]* @str, i64 0, i64 0 +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %2 = load i8 + %2 = load i8, i8* %1, align 1 + %3 = getelementptr [2 x i8], [2 x i8]* @str, i64 0, i64 1 +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %4 = load i8 + %4 = load i8, i8* %3, align 1 + %5 = add i8 %2, %4 + ret i8 %5 +} + +define i8 @select(i1 %b) { + %1 = alloca i8, align 1 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 0 + store i8 0, i8* %1, align 1 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call void @clobberAllTheThings() + call void @clobberAllTheThings() + %2 = getelementptr [2 x i8], [2 x i8]* @str, i64 0, i64 0 + %3 = select i1 %b, i8* %2, i8* %1 +; CHECK: MemoryUse(2) +; CHECK-NEXT: %4 = load i8 + %4 = load i8, i8* %3, align 1 + ret i8 %4 +} Index: test/Transforms/Util/MemorySSA/load-invariant.ll =================================================================== --- test/Transforms/Util/MemorySSA/load-invariant.ll +++ test/Transforms/Util/MemorySSA/load-invariant.ll @@ -1,11 +1,7 @@ -; XFAIL: ; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s ; ; Invariant loads should be considered live on entry, because, once the ; location is known to be dereferenceable, the value can never change. -; -; Currently XFAILed because this optimization was held back from the initial -; commit. @g = external global i32 @@ -21,4 +17,19 @@ ret i32 %1 } +define i32 @bar(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: call void @clobberAllTheThings() + call void @clobberAllTheThings() + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: %1 = load atomic i32 + %1 = load atomic i32, i32* %a acquire, align 4, !invariant.load !0 + +; CHECK: MemoryUse(2) +; CHECK-NEXT: %2 = load i32 + %2 = load i32, i32* %a, align 4 + ret i32 %2 +} + !0 = !{} Index: test/Transforms/Util/MemorySSA/volatile-clobber.ll =================================================================== --- test/Transforms/Util/MemorySSA/volatile-clobber.ll +++ test/Transforms/Util/MemorySSA/volatile-clobber.ll @@ -19,3 +19,25 @@ %4 = add i32 %3, %2 ret i32 %4 } + +define i32 @noclobber(i32* %vol, i32* %nonvol) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: %1 = load volatile i32 + %1 = load volatile i32, i32* %vol, align 4 +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %2 = load i32 + %2 = load i32, i32* %nonvol, align 4 + %3 = add i32 %1, %2 + ret i32 %3 +} + +define i32 @ordered_clobber(i32* %vol, i32* %nonvol) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: %1 = load atomic volatile i32 + %1 = load atomic volatile i32, i32* %vol acquire, align 4 +; CHECK: MemoryUse(1) +; CHECK-NEXT: %2 = load i32 + %2 = load i32, i32* %nonvol, align 4 + %3 = add i32 %1, %2 + ret i32 %3 +} Index: test/Transforms/Util/invariant-groups.ll =================================================================== --- /dev/null +++ test/Transforms/Util/invariant-groups.ll @@ -0,0 +1,30 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Currently, MemorySSA doesn't support invariant groups. So, we should ignore +; invariant.group.barrier intrinsics entirely. We'll need to pay attention to +; them when/if we decide to support invariant groups. + +@g = external global i32 + +define i32 @foo(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 0 + store i32 0, i32* %a, align 4, !llvm.invariant.group !0 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i32 1 + store i32 1, i32* @g, align 4 + + %1 = bitcast i32* %a to i8* + %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) + %a32 = bitcast i8* %a8 to i32* + +; CHECK: MemoryUse(2) +; CHECK-NEXT: %2 = load i32 + %2 = load i32, i32* %a32, align 4, !llvm.invariant.group !0 + ret i32 %2 +} + +declare i8* @llvm.invariant.group.barrier(i8*) + +!0 = !{!"group1"}