Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -442,6 +442,16 @@ if (!I->willReturn()) return false; + // None volatile load can be considered as trivial dead independent on atomic + // semantic. This is based on the fact that release-acquire synchronization + // happens only if load reads the value written by store-release operation. + // As soone as load instruction does not have uses, so no one will check what + // actually value has been read. So optimizer may suggest that load reads the + // value was before store release happened and so no synchronization happened. + // This allows us simply to remove this load. + if (LoadInst *LI = dyn_cast(I)) + return !LI->isVolatile(); + if (!I->mayHaveSideEffects()) return true; Index: llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll +++ llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll @@ -323,7 +323,8 @@ ; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, i32 addrspace(1)* [[GEP]] seq_cst, align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 3, !amdgpu.uniform !0 ; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4 -; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]] +; CHECK-NEXT: [[I3_1:%.*]] = add i32 [[I2]], [[I]] +; CHECK-NEXT: [[I3:%.*]] = add i32 [[I3_1]], [[VAL]] ; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 4 ; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4 ; CHECK-NEXT: ret void @@ -334,7 +335,8 @@ %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3 %i2 = load i32, i32 addrspace(1)* %i1, align 4 - %i3 = add i32 %i2, %i + %i3_1 = add i32 %i2, %i + %i3 = add i32 %i3_1, %val %i4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4 store i32 %i3, i32 addrspace(1)* %i4, align 4 ret void Index: llvm/test/CodeGen/PowerPC/atomics-constant.ll =================================================================== --- llvm/test/CodeGen/PowerPC/atomics-constant.ll +++ llvm/test/CodeGen/PowerPC/atomics-constant.ll @@ -9,10 +9,8 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: addis 3, 2, a@toc@ha -; CHECK-NEXT: ld 3, a@toc@l(3) -; CHECK-NEXT: cmpd 7, 4, 4 ; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: cmpd 7, 4, 4 ; CHECK-NEXT: bne- 7, .+4 ; CHECK-NEXT: isync ; CHECK-NEXT: blr Index: llvm/test/Transforms/EarlyCSE/atomics.ll =================================================================== --- llvm/test/Transforms/EarlyCSE/atomics.ll +++ llvm/test/Transforms/EarlyCSE/atomics.ll @@ -1,20 +1,35 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s +; RUN: opt < %s -S -passes=early-cse | FileCheck %s define i32 @test12(i1 %B, i32* %P1, i32* %P2) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: ret i32 [[LOAD0]] +; + %load0 = load i32, i32* %P1 + %1 = load atomic i32, i32* %P2 seq_cst, align 4 + %load1 = load i32, i32* %P1 + %sel = select i1 %B, i32 %load0, i32 %load1 + ret i32 %sel +} + +define i32 @test12_2(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test12_2( +; CHECK-NEXT: [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, i32* [[P2:%.*]] seq_cst, align 4 ; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[P1]], align 4 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]] -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[SEL]], [[TMP1]] +; CHECK-NEXT: ret i32 [[RES]] ; %load0 = load i32, i32* %P1 %1 = load atomic i32, i32* %P2 seq_cst, align 4 %load1 = load i32, i32* %P1 %sel = select i1 %B, i32 %load0, i32 %load1 - ret i32 %sel + %res = add i32 %sel, %1 + ret i32 %res } ; atomic to non-atomic forwarding is legal Index: llvm/test/Transforms/EarlyCSE/basic.ll =================================================================== --- llvm/test/Transforms/EarlyCSE/basic.ll +++ llvm/test/Transforms/EarlyCSE/basic.ll @@ -235,21 +235,6 @@ ret void } -define i32 @test12(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test12( -; CHECK-NEXT: [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, i32* [[P2:%.*]] seq_cst, align 4 -; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[P1]], align 4 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]] -; CHECK-NEXT: ret i32 [[SEL]] -; - %load0 = load i32, i32* %P1 - %1 = load atomic i32, i32* %P2 seq_cst, align 4 - %load1 = load i32, i32* %P1 - %sel = select i1 %B, i32 %load0, i32 %load1 - ret i32 %sel -} - define void @dse1(i32 *%P) { ; CHECK-LABEL: @dse1( ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 Index: llvm/test/Transforms/InstCombine/atomic.ll =================================================================== --- llvm/test/Transforms/InstCombine/atomic.ll +++ llvm/test/Transforms/InstCombine/atomic.ll @@ -425,7 +425,6 @@ define i32 @atomic_load_from_constant_global() { ; CHECK-LABEL: @atomic_load_from_constant_global( -; CHECK-NEXT: [[V:%.*]] = load atomic i32, i32* @c seq_cst, align 4 ; CHECK-NEXT: ret i32 42 ; %v = load atomic i32, i32* @c seq_cst, align 4 @@ -434,7 +433,6 @@ define i8 @atomic_load_from_constant_global_bitcast() { ; CHECK-LABEL: @atomic_load_from_constant_global_bitcast( -; CHECK-NEXT: [[V:%.*]] = load atomic i8, i8* bitcast (i32* @c to i8*) seq_cst, align 1 ; CHECK-NEXT: ret i8 42 ; %v = load atomic i8, i8* bitcast (i32* @c to i8*) seq_cst, align 1 Index: llvm/test/Transforms/InstCombine/store.ll =================================================================== --- llvm/test/Transforms/InstCombine/store.ll +++ llvm/test/Transforms/InstCombine/store.ll @@ -306,7 +306,6 @@ define void @write_back6(i32* %p) { ; CHECK-LABEL: @write_back6( -; CHECK-NEXT: [[V:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 ; CHECK-NEXT: ret void ; %v = load atomic i32, i32* %p seq_cst, align 4