Index: llvm/lib/Transforms/Utils/Local.cpp
===================================================================
--- llvm/lib/Transforms/Utils/Local.cpp
+++ llvm/lib/Transforms/Utils/Local.cpp
@@ -442,6 +442,16 @@
   if (!I->willReturn())
     return false;
 
+  // None volatile load can be considered as trivial dead independent on atomic
+  // semantic. This is based on the fact that release-acquire synchronization
+  // happens only if load reads the value written by store-release operation.
+  // As soone as load instruction does not have uses, so no one will check what
+  // actually value has been read. So optimizer may suggest that load reads the
+  // value was before store release happened and so no synchronization happened.
+  // This allows us simply to remove this load.
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return !LI->isVolatile();
+
   if (!I->mayHaveSideEffects())
     return true;
 
Index: llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
+++ llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
@@ -323,7 +323,8 @@
 ; CHECK-NEXT:    [[VAL:%.*]] = load atomic i32, i32 addrspace(1)* [[GEP]] seq_cst, align 4, !amdgpu.noclobber !0
 ; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 3, !amdgpu.uniform !0
 ; CHECK-NEXT:    [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4
-; CHECK-NEXT:    [[I3:%.*]] = add i32 [[I2]], [[I]]
+; CHECK-NEXT:    [[I3_1:%.*]] = add i32 [[I2]], [[I]]
+; CHECK-NEXT:    [[I3:%.*]] = add i32 [[I3_1]], [[VAL]]
 ; CHECK-NEXT:    [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 4
 ; CHECK-NEXT:    store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4
 ; CHECK-NEXT:    ret void
@@ -334,7 +335,8 @@
   %val = load atomic i32, i32 addrspace(1)* %gep  seq_cst, align 4
   %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
   %i2 = load i32, i32 addrspace(1)* %i1, align 4
-  %i3 = add i32 %i2, %i
+  %i3_1 = add i32 %i2, %i
+  %i3 = add i32 %i3_1, %val
   %i4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4
   store i32 %i3, i32 addrspace(1)* %i4, align 4
   ret void
Index: llvm/test/CodeGen/PowerPC/atomics-constant.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/atomics-constant.ll
+++ llvm/test/CodeGen/PowerPC/atomics-constant.ll
@@ -9,10 +9,8 @@
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li 4, 0
-; CHECK-NEXT:    addis 3, 2, a@toc@ha
-; CHECK-NEXT:    ld 3, a@toc@l(3)
-; CHECK-NEXT:    cmpd 7, 4, 4
 ; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    cmpd 7, 4, 4
 ; CHECK-NEXT:    bne- 7, .+4
 ; CHECK-NEXT:    isync
 ; CHECK-NEXT:    blr
Index: llvm/test/Transforms/EarlyCSE/atomics.ll
===================================================================
--- llvm/test/Transforms/EarlyCSE/atomics.ll
+++ llvm/test/Transforms/EarlyCSE/atomics.ll
@@ -1,20 +1,35 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s
 ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s
+; RUN: opt < %s -S -passes=early-cse | FileCheck %s
 
 define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD0]]
+;
+  %load0 = load i32, i32* %P1
+  %1 = load atomic i32, i32* %P2 seq_cst, align 4
+  %load1 = load i32, i32* %P1
+  %sel = select i1 %B, i32 %load0, i32 %load1
+  ret i32 %sel
+}
+
+define i32 @test12_2(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test12_2(
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, i32* [[P2:%.*]] seq_cst, align 4
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, i32* [[P1]], align 4
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]]
-; CHECK-NEXT:    ret i32 [[SEL]]
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[SEL]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %load0 = load i32, i32* %P1
   %1 = load atomic i32, i32* %P2 seq_cst, align 4
   %load1 = load i32, i32* %P1
   %sel = select i1 %B, i32 %load0, i32 %load1
-  ret i32 %sel
+  %res = add i32 %sel, %1
+  ret i32 %res
 }
 
 ; atomic to non-atomic forwarding is legal
Index: llvm/test/Transforms/EarlyCSE/basic.ll
===================================================================
--- llvm/test/Transforms/EarlyCSE/basic.ll
+++ llvm/test/Transforms/EarlyCSE/basic.ll
@@ -235,21 +235,6 @@
   ret void
 }
 
-define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, i32* [[P2:%.*]] seq_cst, align 4
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, i32* [[P1]], align 4
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]]
-; CHECK-NEXT:    ret i32 [[SEL]]
-;
-  %load0 = load i32, i32* %P1
-  %1 = load atomic i32, i32* %P2 seq_cst, align 4
-  %load1 = load i32, i32* %P1
-  %sel = select i1 %B, i32 %load0, i32 %load1
-  ret i32 %sel
-}
-
 define void @dse1(i32 *%P) {
 ; CHECK-LABEL: @dse1(
 ; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
Index: llvm/test/Transforms/InstCombine/atomic.ll
===================================================================
--- llvm/test/Transforms/InstCombine/atomic.ll
+++ llvm/test/Transforms/InstCombine/atomic.ll
@@ -425,7 +425,6 @@
 
 define i32 @atomic_load_from_constant_global() {
 ; CHECK-LABEL: @atomic_load_from_constant_global(
-; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* @c seq_cst, align 4
 ; CHECK-NEXT:    ret i32 42
 ;
   %v = load atomic i32, i32* @c seq_cst, align 4
@@ -434,7 +433,6 @@
 
 define i8 @atomic_load_from_constant_global_bitcast() {
 ; CHECK-LABEL: @atomic_load_from_constant_global_bitcast(
-; CHECK-NEXT:    [[V:%.*]] = load atomic i8, i8* bitcast (i32* @c to i8*) seq_cst, align 1
 ; CHECK-NEXT:    ret i8 42
 ;
   %v = load atomic i8, i8* bitcast (i32* @c to i8*) seq_cst, align 1
Index: llvm/test/Transforms/InstCombine/store.ll
===================================================================
--- llvm/test/Transforms/InstCombine/store.ll
+++ llvm/test/Transforms/InstCombine/store.ll
@@ -306,7 +306,6 @@
 
 define void @write_back6(i32* %p) {
 ; CHECK-LABEL: @write_back6(
-; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
 ; CHECK-NEXT:    ret void
 ;
   %v = load atomic i32, i32* %p seq_cst, align 4