Index: llvm/trunk/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/GVN.cpp +++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp @@ -1219,6 +1219,7 @@ assert((DepInfo.isDef() || DepInfo.isClobber()) && "expected a local dependence"); + assert(LI->isUnordered() && "rules below are incorrect for ordered access"); const DataLayout &DL = LI->getModule()->getDataLayout(); @@ -1227,7 +1228,8 @@ // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast(DepInfo.getInst())) { - if (Address) { + // Can't forward from non-atomic to atomic without violating memory model. + if (Address && LI->isAtomic() <= DepSI->isAtomic()) { int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI); if (Offset != -1) { @@ -1244,7 +1246,8 @@ if (LoadInst *DepLI = dyn_cast(DepInfo.getInst())) { // If this is a clobber and L is the first instruction in its block, then // we have the first instruction in the entry block. - if (DepLI != LI && Address) { + // Can't forward from non-atomic to atomic without violating memory model. + if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) { int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL); @@ -1258,7 +1261,7 @@ // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. if (MemIntrinsic *DepMI = dyn_cast(DepInfo.getInst())) { - if (Address) { + if (Address && !LI->isAtomic()) { int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, DepMI, DL); if (Offset != -1) { @@ -1304,6 +1307,10 @@ LI->getType(), DL)) return false; + // Can't forward from non-atomic to atomic without violating memory model. + if (S->isAtomic() < LI->isAtomic()) + return false; + Res = AvailableValue::get(S->getValueOperand()); return true; } @@ -1316,6 +1323,10 @@ !CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) return false; + // Can't forward from non-atomic to atomic without violating memory model. + if (LD->isAtomic() < LI->isAtomic()) + return false; + Res = AvailableValue::getLoad(LD); return true; } @@ -1587,6 +1598,11 @@ if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress)) return false; + // This code hasn't been audited for atomic, ordered, or volatile memory + // access. + if (!LI->isSimple()) + return false; + // Step 1: Find the non-local dependencies of the load. LoadDepVect Deps; MD->getNonLocalPointerDependency(LI, Deps); @@ -1755,7 +1771,8 @@ if (!MD) return false; - if (!L->isSimple()) + // This code hasn't been audited for ordered or volatile memory access + if (!L->isUnordered()) return false; if (L->use_empty()) { Index: llvm/trunk/test/Transforms/GVN/atomic.ll =================================================================== --- llvm/trunk/test/Transforms/GVN/atomic.ll +++ llvm/trunk/test/Transforms/GVN/atomic.ll @@ -107,3 +107,233 @@ ; CHECK: load i32, i32* @y, align 4 ret i32 %load } + +; CHECK-LABEL: @test12( +; Can't remove a load over a ordering barrier +define i32 @test12(i1 %B, i32* %P1, i32* %P2) { + %load0 = load i32, i32* %P1 + %1 = load atomic i32, i32* %P2 seq_cst, align 4 + %load1 = load i32, i32* %P1 + %sel = select i1 %B, i32 %load0, i32 %load1 + ret i32 %sel + ; CHECK: load i32, i32* %P1 + ; CHECK: load i32, i32* %P1 +} + +; CHECK-LABEL: @test13( +; atomic to non-atomic forwarding is legal +define i32 @test13(i32* %P1) { + %a = load atomic i32, i32* %P1 seq_cst, align 4 + %b = load i32, i32* %P1 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK: load atomic i32, i32* %P1 + ; CHECK: ret i32 0 +} + +; CHECK-LABEL: @test13b( +define i32 @test13b(i32* %P1) { + store atomic i32 0, i32* %P1 unordered, align 4 + %b = load i32, i32* %P1 + ret i32 %b + ; CHECK: ret i32 0 +} + +; CHECK-LABEL: @test14( +; atomic to unordered atomic forwarding is legal +define i32 @test14(i32* %P1) { + %a = load atomic i32, i32* %P1 seq_cst, align 4 + %b = load atomic i32, i32* %P1 unordered, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK: load atomic i32, i32* %P1 seq_cst + ; CHECK-NEXT: ret i32 0 +} + +; CHECK-LABEL: @test15( +; implementation restriction: can't forward to stonger +; than unordered +define i32 @test15(i32* %P1, i32* %P2) { + %a = load atomic i32, i32* %P1 seq_cst, align 4 + %b = load atomic i32, i32* %P1 seq_cst, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK: load atomic i32, i32* %P1 + ; CHECK: load atomic i32, i32* %P1 +} + +; CHECK-LABEL: @test16( +; forwarding non-atomic to atomic is wrong! (However, +; it would be legal to use the later value in place of the +; former in this particular example. We just don't +; do that right now.) +define i32 @test16(i32* %P1, i32* %P2) { + %a = load i32, i32* %P1, align 4 + %b = load atomic i32, i32* %P1 unordered, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK: load i32, i32* %P1 + ; CHECK: load atomic i32, i32* %P1 +} + +; CHECK-LABEL: @test16b( +define i32 @test16b(i32* %P1) { + store i32 0, i32* %P1 + %b = load atomic i32, i32* %P1 unordered, align 4 + ret i32 %b + ; CHECK: load atomic i32, i32* %P1 +} + +; Can't DSE across a full fence +define void @fence_seq_cst_store(i32* %P1, i32* %P2) { +; CHECK-LABEL: @fence_seq_cst_store( +; CHECK: store +; CHECK: store atomic +; CHECK: store + store i32 0, i32* %P1, align 4 + store atomic i32 0, i32* %P2 seq_cst, align 4 + store i32 0, i32* %P1, align 4 + ret void +} + +; Can't DSE across a full fence +define void @fence_seq_cst(i32* %P1, i32* %P2) { +; CHECK-LABEL: @fence_seq_cst( +; CHECK: store +; CHECK: fence seq_cst +; CHECK: store + store i32 0, i32* %P1, align 4 + fence seq_cst + store i32 0, i32* %P1, align 4 + ret void +} + +; Can't DSE across a full singlethread fence +define void @fence_seq_cst_st(i32* %P1, i32* %P2) { +; CHECK-LABEL: @fence_seq_cst_st( +; CHECK: store +; CHECK: fence singlethread seq_cst +; CHECK: store + store i32 0, i32* %P1, align 4 + fence singlethread seq_cst + store i32 0, i32* %P1, align 4 + ret void +} + +; Can't DSE across a full fence +define void @fence_asm_sideeffect(i32* %P1, i32* %P2) { +; CHECK-LABEL: @fence_asm_sideeffect( +; CHECK: store +; CHECK: call void asm sideeffect +; CHECK: store + store i32 0, i32* %P1, align 4 + call void asm sideeffect "", ""() + store i32 0, i32* %P1, align 4 + ret void +} + +; Can't DSE across a full fence +define void @fence_asm_memory(i32* %P1, i32* %P2) { +; CHECK-LABEL: @fence_asm_memory( +; CHECK: store +; CHECK: call void asm +; CHECK: store + store i32 0, i32* %P1, align 4 + call void asm "", "~{memory}"() + store i32 0, i32* %P1, align 4 + ret void +} + +; Can't remove a volatile load +define i32 @volatile_load(i32* %P1, i32* %P2) { + %a = load i32, i32* %P1, align 4 + %b = load volatile i32, i32* %P1, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK-LABEL: @volatile_load( + ; CHECK: load i32, i32* %P1 + ; CHECK: load volatile i32, i32* %P1 +} + +; Can't remove redundant volatile loads +define i32 @redundant_volatile_load(i32* %P1, i32* %P2) { + %a = load volatile i32, i32* %P1, align 4 + %b = load volatile i32, i32* %P1, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK-LABEL: @redundant_volatile_load( + ; CHECK: load volatile i32, i32* %P1 + ; CHECK: load volatile i32, i32* %P1 + ; CHECK: sub +} + +; Can't DSE a volatile store +define void @volatile_store(i32* %P1, i32* %P2) { +; CHECK-LABEL: @volatile_store( +; CHECK: store volatile +; CHECK: store + store volatile i32 0, i32* %P1, align 4 + store i32 3, i32* %P1, align 4 + ret void +} + +; Can't DSE a redundant volatile store +define void @redundant_volatile_store(i32* %P1, i32* %P2) { +; CHECK-LABEL: @redundant_volatile_store( +; CHECK: store volatile +; CHECK: store volatile + store volatile i32 0, i32* %P1, align 4 + store volatile i32 0, i32* %P1, align 4 + ret void +} + +; Can value forward from volatiles +define i32 @test20(i32* %P1, i32* %P2) { + %a = load volatile i32, i32* %P1, align 4 + %b = load i32, i32* %P1, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK-LABEL: @test20( + ; CHECK: load volatile i32, i32* %P1 + ; CHECK: ret i32 0 +} + +; We're currently conservative about widening +define i64 @widen1(i32* %P1) { + ; CHECK-LABEL: @widen1( + ; CHECK: load atomic i32, i32* %P1 + ; CHECK: load atomic i64, i64* %p2 + %p2 = bitcast i32* %P1 to i64* + %a = load atomic i32, i32* %P1 unordered, align 4 + %b = load atomic i64, i64* %p2 unordered, align 4 + %a64 = sext i32 %a to i64 + %res = sub i64 %a64, %b + ret i64 %res +} + +; narrowing does work +define i64 @narrow(i32* %P1) { + ; CHECK-LABEL: @narrow( + ; CHECK: load atomic i64, i64* %p2 + ; CHECK-NOT: load atomic i32, i32* %P1 + %p2 = bitcast i32* %P1 to i64* + %a64 = load atomic i64, i64* %p2 unordered, align 4 + %b = load atomic i32, i32* %P1 unordered, align 4 + %b64 = sext i32 %b to i64 + %res = sub i64 %a64, %b64 + ret i64 %res +} + +; Missed optimization, we don't yet optimize ordered loads +define i64 @narrow2(i32* %P1) { + ; CHECK-LABEL: @narrow2( + ; CHECK: load atomic i64, i64* %p2 + ; CHECK: load atomic i32, i32* %P1 + %p2 = bitcast i32* %P1 to i64* + %a64 = load atomic i64, i64* %p2 acquire, align 4 + %b = load atomic i32, i32* %P1 acquire, align 4 + %b64 = sext i32 %b to i64 + %res = sub i64 %a64, %b64 + ret i64 %res +} +