Index: llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp +++ llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp @@ -207,6 +207,50 @@ return false; } +enum class Reorderability { + Always, + IfNoAlias, + Never +}; + +/// This does one-way checks to see if Use could theoretically be hoisted above +/// MayClobber. This will not check the other way around. +/// +/// This assumes that, for the purposes of MemorySSA, Use comes directly after +/// MayClobber, with no potentially clobbering operations in between them. +/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) +static Reorderability getLoadReorderability(const LoadInst *Use, + const LoadInst *MayClobber) { + bool VolatileUse = Use->isVolatile(); + bool VolatileClobber = MayClobber->isVolatile(); + // Volatile operations may never be reordered with other volatile operations. + if (VolatileUse && VolatileClobber) + return Reorderability::Never; + + // The lang ref allows reordering of volatile and non-volatile operations. + // Whether an aliasing nonvolatile load and volatile load can be reordered, + // though, is ambiguous. Because it may not be best to exploit this ambiguity, + // we only allow volatile/non-volatile reordering if the volatile and + // non-volatile operations don't alias. + Reorderability Result = VolatileUse || VolatileClobber + ? Reorderability::IfNoAlias + : Reorderability::Always; + + // If a load is seq_cst, it cannot be moved above other loads. If its ordering + // is weaker, it can be moved above other loads. We just need to be sure that + // MayClobber isn't an acquire load, because loads can't be moved above + // acquire loads. + // + // Note that this explicitly *does* allow the free reordering of monotonic (or + // weaker) loads of the same address. + bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; + bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), + AtomicOrdering::Acquire); + if (SeqCstUse || MayClobberIsAcquire) + return Reorderability::Never; + return Result; +} + static bool instructionClobbersQuery(MemoryDef *MD, const MemoryLocation &UseLoc, const Instruction *UseInst, @@ -234,6 +278,20 @@ ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); return I != MRI_NoModRef; } + + if (auto *DefLoad = dyn_cast(DefInst)) { + if (auto *UseLoad = dyn_cast(UseInst)) { + switch (getLoadReorderability(UseLoad, DefLoad)) { + case Reorderability::Always: + return false; + case Reorderability::Never: + return true; + case Reorderability::IfNoAlias: + return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad)); + } + } + } + return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod; } Index: llvm/trunk/test/Transforms/Util/MemorySSA/atomic-clobber.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/atomic-clobber.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/atomic-clobber.ll @@ -3,6 +3,7 @@ ; ; Ensures that atomic loads count as MemoryDefs +; CHECK-LABEL: define i32 @foo define i32 @foo(i32* %a, i32* %b) { ; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: store i32 4 @@ -16,3 +17,103 @@ %3 = add i32 %1, %2 ret i32 %3 } + +; CHECK-LABEL: define void @bar +define void @bar(i32* %a) { +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: load atomic i32, i32* %a unordered, align 4 + load atomic i32, i32* %a unordered, align 4 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: load atomic i32, i32* %a monotonic, align 4 + load atomic i32, i32* %a monotonic, align 4 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: load atomic i32, i32* %a acquire, align 4 + load atomic i32, i32* %a acquire, align 4 +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: load atomic i32, i32* %a seq_cst, align 4 + load atomic i32, i32* %a seq_cst, align 4 + ret void +} + +; CHECK-LABEL: define void @baz +define void @baz(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: %1 = load atomic i32 + %1 = load atomic i32, i32* %a acquire, align 4 +; CHECK: MemoryUse(1) +; CHECK-NEXT: %2 = load atomic i32, i32* %a unordered, align 4 + %2 = load atomic i32, i32* %a unordered, align 4 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: %3 = load atomic i32, i32* %a monotonic, align 4 + %3 = load atomic i32, i32* %a monotonic, align 4 + ret void +} + +; CHECK-LABEL: define void @fences +define void @fences(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: fence acquire + fence acquire +; CHECK: MemoryUse(1) +; CHECK-NEXT: %1 = load i32, i32* %a + %1 = load i32, i32* %a + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: fence release + fence release +; CHECK: MemoryUse(2) +; CHECK-NEXT: %2 = load i32, i32* %a + %2 = load i32, i32* %a + +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: fence acq_rel + fence acq_rel +; CHECK: MemoryUse(3) +; CHECK-NEXT: %3 = load i32, i32* %a + %3 = load i32, i32* %a + +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: fence seq_cst + fence seq_cst +; CHECK: MemoryUse(4) +; CHECK-NEXT: %4 = load i32, i32* %a + %4 = load i32, i32* %a + ret void +} + +; CHECK-LABEL: define void @seq_cst_clobber +define void @seq_cst_clobber(i32* noalias %a, i32* noalias %b) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: %1 = load atomic i32, i32* %a monotonic, align 4 + load atomic i32, i32* %a monotonic, align 4 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: %2 = load atomic i32, i32* %a seq_cst, align 4 + load atomic i32, i32* %a seq_cst, align 4 + +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: load atomic i32, i32* %a monotonic, align 4 + load atomic i32, i32* %a monotonic, align 4 + + ret void +} + +; Ensure that AA hands us MRI_Mod on unreorderable atomic ops. +; +; This test is a bit implementation-specific. In particular, it depends on that +; we pass cmpxchg-load queries to AA, without trying to reason about them on +; our own. +; +; If AA gets more aggressive, we can find another way. +; +; CHECK-LABEL: define void @check_aa_is_sane +define void @check_aa_is_sane(i32* noalias %a, i32* noalias %b) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: cmpxchg i32* %a, i32 0, i32 1 acquire acquire + cmpxchg i32* %a, i32 0, i32 1 acquire acquire +; CHECK: MemoryUse(1) +; CHECK-NEXT: load i32, i32* %b, align 4 + load i32, i32* %b, align 4 + + ret void +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/constant-memory.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/constant-memory.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/constant-memory.ll @@ -0,0 +1,41 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Things that BasicAA can prove points to constant memory should be +; liveOnEntry, as well. + +declare void @clobberAllTheThings() + +@str = private unnamed_addr constant [2 x i8] c"hi" + +define i8 @foo() { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: call void @clobberAllTheThings() + call void @clobberAllTheThings() + %1 = getelementptr [2 x i8], [2 x i8]* @str, i64 0, i64 0 +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %2 = load i8 + %2 = load i8, i8* %1, align 1 + %3 = getelementptr [2 x i8], [2 x i8]* @str, i64 0, i64 1 +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %4 = load i8 + %4 = load i8, i8* %3, align 1 + %5 = add i8 %2, %4 + ret i8 %5 +} + +define i8 @select(i1 %b) { + %1 = alloca i8, align 1 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 0 + store i8 0, i8* %1, align 1 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call void @clobberAllTheThings() + call void @clobberAllTheThings() + %2 = getelementptr [2 x i8], [2 x i8]* @str, i64 0, i64 0 + %3 = select i1 %b, i8* %2, i8* %1 +; CHECK: MemoryUse(2) +; CHECK-NEXT: %4 = load i8 + %4 = load i8, i8* %3, align 1 + ret i8 %4 +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/invariant-groups.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/invariant-groups.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/invariant-groups.ll @@ -0,0 +1,30 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Currently, MemorySSA doesn't support invariant groups. So, we should ignore +; invariant.group.barrier intrinsics entirely. We'll need to pay attention to +; them when/if we decide to support invariant groups. + +@g = external global i32 + +define i32 @foo(i32* %a) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 0 + store i32 0, i32* %a, align 4, !llvm.invariant.group !0 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i32 1 + store i32 1, i32* @g, align 4 + + %1 = bitcast i32* %a to i8* + %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) + %a32 = bitcast i8* %a8 to i32* + +; CHECK: MemoryUse(2) +; CHECK-NEXT: %2 = load i32 + %2 = load i32, i32* %a32, align 4, !llvm.invariant.group !0 + ret i32 %2 +} + +declare i8* @llvm.invariant.group.barrier(i8*) + +!0 = !{!"group1"} Index: llvm/trunk/test/Transforms/Util/MemorySSA/volatile-clobber.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/volatile-clobber.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/volatile-clobber.ll @@ -3,6 +3,7 @@ ; ; Ensures that volatile stores/loads count as MemoryDefs +; CHECK-LABEL: define i32 @foo define i32 @foo() { %1 = alloca i32, align 4 ; CHECK: 1 = MemoryDef(liveOnEntry) @@ -20,3 +21,74 @@ %4 = add i32 %3, %2 ret i32 %4 } + +; Ensuring that we don't automatically hoist nonvolatile loads around volatile +; loads +; CHECK-LABEL define void @volatile_only +define void @volatile_only(i32* %arg1, i32* %arg2) { + ; Trivially NoAlias/MustAlias + %a = alloca i32 + %b = alloca i32 + +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: load volatile i32, i32* %a + load volatile i32, i32* %a +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: load i32, i32* %b + load i32, i32* %b +; CHECK: MemoryUse(1) +; CHECK-NEXT: load i32, i32* %a + load i32, i32* %a + + ; MayAlias +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: load volatile i32, i32* %arg1 + load volatile i32, i32* %arg1 +; CHECK: MemoryUse(2) +; CHECK-NEXT: load i32, i32* %arg2 + load i32, i32* %arg2 + + ret void +} + +; Ensuring that volatile atomic operations work properly. +; CHECK-LABEL define void @volatile_atomics +define void @volatile_atomics(i32* %arg1, i32* %arg2) { + %a = alloca i32 + %b = alloca i32 + + ; Trivially NoAlias/MustAlias + +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: load atomic volatile i32, i32* %a acquire, align 4 + load atomic volatile i32, i32* %a acquire, align 4 +; CHECK: MemoryUse(1) +; CHECK-NEXT: load i32, i32* %b + load i32, i32* %b + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: load atomic volatile i32, i32* %a monotonic, align 4 + load atomic volatile i32, i32* %a monotonic, align 4 +; CHECK: MemoryUse(1) +; CHECK-NEXT: load i32, i32* %b + load i32, i32* %b +; CHECK: MemoryUse(1) +; CHECK-NEXT: load atomic i32, i32* %b unordered, align 4 + load atomic i32, i32* %b unordered, align 4 +; CHECK: MemoryUse(2) +; CHECK-NEXT: load atomic i32, i32* %a unordered, align 4 + load atomic i32, i32* %a unordered, align 4 +; CHECK: MemoryUse(2) +; CHECK-NEXT: load i32, i32* %a + load i32, i32* %a + + ; MayAlias +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: load atomic volatile i32, i32* %arg1 monotonic, align 4 + load atomic volatile i32, i32* %arg1 monotonic, align 4 +; CHECK: MemoryUse(3) +; CHECK-NEXT: load i32, i32* %arg2 + load i32, i32* %arg2 + + ret void +}