diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -888,9 +888,6 @@
   // A call to propagate attributes pass in the backend in case opt was not run.
   addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
 
-  addPass(createAtomicExpandPass());
-
-
   addPass(createAMDGPULowerIntrinsicsPass());
 
   // Function calls are not supported, so make sure we inline everything.
@@ -921,8 +918,12 @@
     addPass(createAMDGPULowerModuleLDSPass());
   }
 
-  if (TM.getOptLevel() > CodeGenOpt::None) {
+  if (TM.getOptLevel() > CodeGenOpt::None)
     addPass(createInferAddressSpacesPass());
+
+  addPass(createAtomicExpandPass());
+
+  if (TM.getOptLevel() > CodeGenOpt::None) {
     addPass(createAMDGPUPromoteAlloca());
 
     if (EnableSROA)
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
--- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
@@ -724,6 +724,81 @@
   ret void
 }
 
+define amdgpu_kernel void @infer_as_before_atomic(float* addrspace(4)* %arg) #0 {
+; GFX900-LABEL: infer_as_before_atomic:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX900-NEXT:    s_mov_b64 s[2:3], 0
+; GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX900-NEXT:    v_mov_b32_e32 v1, s4
+; GFX900-NEXT:  BB9_1: ; %atomicrmw.start
+; GFX900-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX900-NEXT:    v_add_f32_e32 v0, 1.0, v1
+; GFX900-NEXT:    v_mov_b32_e32 v2, 0
+; GFX900-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
+; GFX900-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v1
+; GFX900-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX900-NEXT:    v_mov_b32_e32 v1, v0
+; GFX900-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX900-NEXT:    s_cbranch_execnz BB9_1
+; GFX900-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX900-NEXT:    s_endpgm
+;
+; GFX908-LABEL: infer_as_before_atomic:
+; GFX908:       ; %bb.0:
+; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX908-NEXT:    v_mov_b32_e32 v0, 0
+; GFX908-NEXT:    v_mov_b32_e32 v1, 1.0
+; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX908-NEXT:    global_atomic_add_f32 v0, v1, s[0:1]
+; GFX908-NEXT:    s_endpgm
+;
+; GFX90A-LABEL: infer_as_before_atomic:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v1, 1.0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    global_atomic_add_f32 v0, v1, s[0:1]
+; GFX90A-NEXT:    s_endpgm
+;
+; GFX10-LABEL: infer_as_before_atomic:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; GFX10-NEXT:    s_mov_b32 s2, 0
+; GFX10-NEXT:  BB9_1: ; %atomicrmw.start
+; GFX10-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-NEXT:    v_add_f32_e32 v0, 1.0, v1
+; GFX10-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-NEXT:    global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v0, v1
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0
+; GFX10-NEXT:    s_or_b32 s2, vcc_lo, s2
+; GFX10-NEXT:    s_andn2_b32 exec_lo, exec_lo, s2
+; GFX10-NEXT:    s_cbranch_execnz BB9_1
+; GFX10-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX10-NEXT:    s_endpgm
+  %load = load float*, float* addrspace(4)* %arg
+  %v = atomicrmw fadd float* %load, float 1.0 syncscope("agent-one-as") monotonic, align 4
+  ret void
+}
+
 attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
 attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" "amdgpu-unsafe-fp-atomics"="true" }
 attributes #2 = { "amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -34,7 +34,6 @@
 ; GCN-O0-NEXT:     Fix function bitcasts for AMDGPU
 ; GCN-O0-NEXT:     FunctionPass Manager
 ; GCN-O0-NEXT:       Early propagate attributes from kernels to functions
-; GCN-O0-NEXT:       Expand Atomic instructions
 ; GCN-O0-NEXT:     AMDGPU Lower Intrinsics
 ; GCN-O0-NEXT:     AMDGPU Inline All Functions
 ; GCN-O0-NEXT:     CallGraph Construction
@@ -44,6 +43,7 @@
 ; GCN-O0-NEXT:     Lower OpenCL enqueued blocks
 ; GCN-O0-NEXT:     Lower uses of LDS variables from non-kernel functions
 ; GCN-O0-NEXT:     FunctionPass Manager
+; GCN-O0-NEXT:     Expand Atomic instructions
 ; GCN-O0-NEXT:       Lower Garbage Collection Instructions
 ; GCN-O0-NEXT:       Shadow Stack GC Lowering
 ; GCN-O0-NEXT:       Lower constant intrinsics
@@ -177,7 +177,6 @@
 ; GCN-O1-NEXT:     Fix function bitcasts for AMDGPU
 ; GCN-O1-NEXT:     FunctionPass Manager
 ; GCN-O1-NEXT:       Early propagate attributes from kernels to functions
-; GCN-O1-NEXT:       Expand Atomic instructions
 ; GCN-O1-NEXT:     AMDGPU Lower Intrinsics
 ; GCN-O1-NEXT:     AMDGPU Inline All Functions
 ; GCN-O1-NEXT:     CallGraph Construction
@@ -188,6 +187,7 @@
 ; GCN-O1-NEXT:     Lower uses of LDS variables from non-kernel functions
 ; GCN-O1-NEXT:     FunctionPass Manager
 ; GCN-O1-NEXT:       Infer address spaces
+; GCN-O1-NEXT:       Expand Atomic instructions
 ; GCN-O1-NEXT:       AMDGPU Promote Alloca
 ; GCN-O1-NEXT:       Dominator Tree Construction
 ; GCN-O1-NEXT:       SROA
@@ -425,7 +425,6 @@
 ; GCN-O1-OPTS-NEXT:     Fix function bitcasts for AMDGPU
 ; GCN-O1-OPTS-NEXT:     FunctionPass Manager
 ; GCN-O1-OPTS-NEXT:       Early propagate attributes from kernels to functions
-; GCN-O1-OPTS-NEXT:       Expand Atomic instructions
 ; GCN-O1-OPTS-NEXT:     AMDGPU Lower Intrinsics
 ; GCN-O1-OPTS-NEXT:     AMDGPU Inline All Functions
 ; GCN-O1-OPTS-NEXT:     CallGraph Construction
@@ -436,6 +435,7 @@
 ; GCN-O1-OPTS-NEXT:     Lower uses of LDS variables from non-kernel functions
 ; GCN-O1-OPTS-NEXT:     FunctionPass Manager
 ; GCN-O1-OPTS-NEXT:       Infer address spaces
+; GCN-O1-OPTS-NEXT:       Expand Atomic instructions
 ; GCN-O1-OPTS-NEXT:       AMDGPU Promote Alloca
 ; GCN-O1-OPTS-NEXT:       Dominator Tree Construction
 ; GCN-O1-OPTS-NEXT:       SROA
@@ -706,7 +706,6 @@
 ; GCN-O2-NEXT:     Fix function bitcasts for AMDGPU
 ; GCN-O2-NEXT:     FunctionPass Manager
 ; GCN-O2-NEXT:       Early propagate attributes from kernels to functions
-; GCN-O2-NEXT:       Expand Atomic instructions
 ; GCN-O2-NEXT:     AMDGPU Lower Intrinsics
 ; GCN-O2-NEXT:     AMDGPU Inline All Functions
 ; GCN-O2-NEXT:     CallGraph Construction
@@ -717,6 +716,7 @@
 ; GCN-O2-NEXT:     Lower uses of LDS variables from non-kernel functions
 ; GCN-O2-NEXT:     FunctionPass Manager
 ; GCN-O2-NEXT:       Infer address spaces
+; GCN-O2-NEXT:       Expand Atomic instructions
 ; GCN-O2-NEXT:       AMDGPU Promote Alloca
 ; GCN-O2-NEXT:       Dominator Tree Construction
 ; GCN-O2-NEXT:       SROA
@@ -988,7 +988,6 @@
 ; GCN-O3-NEXT:     Fix function bitcasts for AMDGPU
 ; GCN-O3-NEXT:     FunctionPass Manager
 ; GCN-O3-NEXT:       Early propagate attributes from kernels to functions
-; GCN-O3-NEXT:       Expand Atomic instructions
 ; GCN-O3-NEXT:     AMDGPU Lower Intrinsics
 ; GCN-O3-NEXT:     AMDGPU Inline All Functions
 ; GCN-O3-NEXT:     CallGraph Construction
@@ -999,6 +998,7 @@
 ; GCN-O3-NEXT:     Lower uses of LDS variables from non-kernel functions
 ; GCN-O3-NEXT:     FunctionPass Manager
 ; GCN-O3-NEXT:       Infer address spaces
+; GCN-O3-NEXT:       Expand Atomic instructions
 ; GCN-O3-NEXT:       AMDGPU Promote Alloca
 ; GCN-O3-NEXT:       Dominator Tree Construction
 ; GCN-O3-NEXT:       SROA