Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -553,12 +553,9 @@ } // CHECK-LABEL: @test_get_grid_size( +// CHECK: phi i64 [ 20, %sw.bb2 ], [ 16, %sw.bb1 ], [ 12, %entry ] // CHECK: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 12 -// CHECK: load i32, i32 addrspace(4)* %{{.*}}, align 4, !invariant.load -// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 16 -// CHECK: load i32, i32 addrspace(4)* %{{.*}}, align 4, !invariant.load -// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 20 +// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i64 %{{.*}} // CHECK: load i32, i32 addrspace(4)* %{{.*}}, align 4, !invariant.load void test_get_grid_size(int d, global int *out) { Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -1293,15 +1293,21 @@ to ensure a pointer is not null or otherwise the behavior is undefined. ``dereferenceable()`` - This indicates that the parameter or return pointer is dereferenceable. This - attribute may only be applied to pointer typed parameters. A pointer that - is dereferenceable can be loaded from speculatively without a risk of - trapping. The number of bytes known to be dereferenceable must be provided - in parentheses. It is legal for the number of bytes to be less than the - size of the pointee type. The ``nonnull`` attribute does not imply - dereferenceability (consider a pointer to one element past the end of an - array), however ``dereferenceable()`` does imply ``nonnull`` in - ``addrspace(0)`` (which is the default address space), except if the + This indicates that the parameter or return pointer is dereferenceable at + the instant of the call. This attribute may only be applied to pointer + typed parameters. The number of bytes known to be dereferenceable must + be provided in parentheses. It is legal for the number of bytes to be less + than the size of the pointee type. + + A pointer that is dereferenceable at a particular location in the program + can be loaded from speculatively without a risk of trapping at that + location. In general, once a memory location becomes dereferenceable, it + will remain dereferenceable until the underlying object is freed. + + The ``nonnull`` attribute does not imply dereferenceability (consider a + pointer to one element past the end of an array), however + ``dereferenceable()`` does imply ``nonnull`` in ``addrspace(0)`` + (which is the default address space), except if the ``null_pointer_is_valid`` function attribute is present. ``n`` should be a positive number. The pointer should be well defined, otherwise it is undefined behavior. This means ``dereferenceable()`` @@ -6294,10 +6300,10 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The existence of the ``!dereferenceable`` metadata on the instruction -tells the optimizer that the value loaded is known to be dereferenceable. -The number of bytes known to be dereferenceable is specified by the integer -value in the metadata node. This is analogous to the ''dereferenceable'' -attribute on parameters and return values. +tells the optimizer that the value loaded is known to be dereferenceable at +that program location. The number of bytes known to be dereferenceable is +specified by the integer value in the metadata node. This is analogous to the +''dereferenceable'' attribute on parameters and return values. .. _md_dereferenceable_or_null: @@ -6306,7 +6312,7 @@ The existence of the ``!dereferenceable_or_null`` metadata on the instruction tells the optimizer that the value loaded is known to be either -dereferenceable or null. +dereferenceable or null at that program location. The number of bytes known to be dereferenceable is specified by the integer value in the metadata node. This is analogous to the ''dereferenceable_or_null'' attribute on parameters and return values. Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -208,6 +208,7 @@ uint64_t DerefBytes = V.getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes; + // TODO: We may be able to do better for a potentially freed pointer here DerefBytes = CanBeFreed ? 0 : DerefBytes; // If queried with a precise location size, we assume that location size to be // accessed, thus valid. Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -39,7 +39,7 @@ using namespace llvm; static cl::opt UseDerefAtPointSemantics( - "use-dereferenceable-at-point-semantics", cl::Hidden, cl::init(false), + "use-dereferenceable-at-point-semantics", cl::Hidden, cl::init(true), cl::desc("Deref attributes and metadata infer facts at definition only")); //===----------------------------------------------------------------------===// Index: llvm/test/Analysis/BasicAA/dereferenceable.ll =================================================================== --- llvm/test/Analysis/BasicAA/dereferenceable.ll +++ llvm/test/Analysis/BasicAA/dereferenceable.ll @@ -63,7 +63,7 @@ define i32 @local_and_deref_ret_2() { ; CHECK: Function: local_and_deref_ret_2: 2 pointers, 2 call sites -; CHECK-NEXT: NoAlias: i32* %obj, i32* %ret +; CHECK-NEXT: MayAlias: i32* %obj, i32* %ret bb: %obj = alloca i32 call void @unknown(i32* %obj) Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll @@ -4,11 +4,12 @@ define i32 @load_invariant(i32* %ptr) { ; CHECK-LABEL: name: load_invariant ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load (s32) from %ir.ptr) - ; CHECK: $w0 = COPY [[LOAD]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load (s32) from %ir.ptr) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4, !invariant.load !0 ret i32 %load } @@ -16,11 +17,12 @@ define i32 @load_volatile_invariant(i32* %ptr) { ; CHECK-LABEL: name: load_volatile_invariant ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile invariant load (s32) from %ir.ptr) - ; CHECK: $w0 = COPY [[LOAD]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile invariant load (s32) from %ir.ptr) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %load = load volatile i32, i32* %ptr, align 4, !invariant.load !0 ret i32 %load } @@ -28,11 +30,12 @@ define i32 @load_dereferenceable(i32* dereferenceable(4) %ptr) { ; CHECK-LABEL: name: load_dereferenceable ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable load (s32) from %ir.ptr) - ; CHECK: $w0 = COPY [[LOAD]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4 ret i32 %load } @@ -40,11 +43,12 @@ define i32 @load_dereferenceable_invariant(i32* dereferenceable(4) %ptr) { ; CHECK-LABEL: name: load_dereferenceable_invariant ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable invariant load (s32) from %ir.ptr) - ; CHECK: $w0 = COPY [[LOAD]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load (s32) from %ir.ptr) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4, !invariant.load !0 ret i32 %load } @@ -52,11 +56,12 @@ define i32 @load_nontemporal(i32* %ptr) { ; CHECK-LABEL: name: load_nontemporal ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (non-temporal load (s32) from %ir.ptr) - ; CHECK: $w0 = COPY [[LOAD]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (non-temporal load (s32) from %ir.ptr) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4, !nontemporal !0 ret i32 %load } @@ -64,11 +69,12 @@ define i32 @load_falkor_strided_access(i32* %ptr) { ; CHECK-LABEL: name: load_falkor_strided_access ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load (s32) from %ir.ptr) - ; CHECK: $w0 = COPY [[LOAD]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load (s32) from %ir.ptr) + ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4, !falkor.strided.access !0 ret i32 %load } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -84,7 +84,7 @@ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_func_i32 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2844,10 +2844,10 @@ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset.cast, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (invariant load (p1) from %ir.p.kernarg.offset.cast, align 16, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset.cast, align 8, addrspace 4) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from %ir.idx.kernarg.offset.cast, align 8, addrspace 4) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -1300,7 +1300,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_p0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4516,10 +4516,10 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (invariant load (<32 x s32>) from %ir.val.kernarg.offset.cast, align 16, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -4,51 +4,52 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:sreg_64(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4) - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) - ; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK: $vgpr31 = COPY [[OR1]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[LOAD]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p0) = G_LOAD [[INT]](p4) :: (invariant load (p0) from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[LOAD]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0 call void %fptr() ret void } @@ -56,18 +57,19 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:sreg_64(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call amdgpu_gfx void %fptr() ret void } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -148,7 +148,7 @@ ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.0, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (invariant load (<2 x s32>) from %ir.0, align 16, addrspace 4) ; GCN-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32) ; GCN-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 Index: llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll +++ llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll @@ -4,241 +4,242 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias dereferenceable(18446744073709551615) %arg0, i32 %arg1) { ; GCN-LABEL: name: mmo_offsets0 ; GCN: bb.0.bb.0: - ; GCN: liveins: $sgpr0, $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 - ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg0, addrspace 6) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 16, align 1, addrspace 4) - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 32, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 48, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 64, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 80, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 96, align 1, addrspace 4) - ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 - ; GCN: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]].sub0 - ; GCN: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[DEF2]] - ; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0 - ; GCN: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[DEF3]] - ; GCN: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0 - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec - ; GCN: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 112, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64 - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128 - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72 - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144 - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80 - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160 - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) - ; GCN: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[DEF4]] - ; GCN: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]].sub0 - ; GCN: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88 - ; GCN: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) - ; GCN: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF5]] - ; GCN: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY15]].sub0 - ; GCN: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176 - ; GCN: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) - ; GCN: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF6]] - ; GCN: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 - ; GCN: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF7]] - ; GCN: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0 - ; GCN: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF8]] - ; GCN: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0 - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96 - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192 - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104 - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208 - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112 - ; GCN: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224 - ; GCN: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) - ; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120 - ; GCN: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240 - ; GCN: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) - ; GCN: [[COPY37:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY37]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256 - ; GCN: [[COPY38:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY38]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY39:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[COPY40:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) - ; GCN: [[COPY42:%[0-9]+]]:vreg_64 = COPY [[DEF9]] - ; GCN: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[COPY42]].sub0 - ; GCN: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136 - ; GCN: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[DEF10:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) - ; GCN: [[COPY45:%[0-9]+]]:vreg_64 = COPY [[DEF10]] - ; GCN: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[COPY45]].sub0 - ; GCN: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272 - ; GCN: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[DEF11:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) - ; GCN: [[COPY48:%[0-9]+]]:vreg_64 = COPY [[DEF11]] - ; GCN: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[COPY48]].sub0 - ; GCN: [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY50:%[0-9]+]]:vreg_64 = COPY [[DEF12]] - ; GCN: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[COPY50]].sub0 - ; GCN: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY54:%[0-9]+]]:vreg_64 = COPY [[DEF13]] - ; GCN: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[COPY54]].sub0 - ; GCN: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY56:%[0-9]+]]:vreg_64 = COPY [[DEF14]] - ; GCN: [[COPY57:%[0-9]+]]:vgpr_32 = COPY [[COPY56]].sub0 - ; GCN: [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; GCN: [[COPY58:%[0-9]+]]:vreg_64 = COPY [[DEF15]] - ; GCN: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[COPY58]].sub0 - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[COPY60:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) - ; GCN: [[COPY61:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288 - ; GCN: [[COPY62:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[COPY65:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152 - ; GCN: [[COPY66:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) - ; GCN: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304 - ; GCN: [[COPY67:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: [[COPY68:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[COPY69:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) - ; GCN: S_ENDPGM 0 + ; GCN-NEXT: liveins: $sgpr0, $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 + ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (invariant load (s128) from %ir.arg0, addrspace 6) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 16, align 1, addrspace 4) + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 32, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 48, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 64, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 80, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 96, align 1, addrspace 4) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF1]] + ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]].sub0 + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[DEF2]] + ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0 + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[DEF3]] + ; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0 + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 112, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64 + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128 + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72 + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144 + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80 + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160 + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[DEF4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]].sub0 + ; GCN-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88 + ; GCN-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) + ; GCN-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF5]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY15]].sub0 + ; GCN-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176 + ; GCN-NEXT: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF6]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 + ; GCN-NEXT: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF7]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF8]] + ; GCN-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0 + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96 + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192 + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104 + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208 + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) + ; GCN-NEXT: [[COPY37:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY37]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256 + ; GCN-NEXT: [[COPY38:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY38]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[COPY40:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) + ; GCN-NEXT: [[COPY42:%[0-9]+]]:vreg_64 = COPY [[DEF9]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[COPY42]].sub0 + ; GCN-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136 + ; GCN-NEXT: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[DEF10:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) + ; GCN-NEXT: [[COPY45:%[0-9]+]]:vreg_64 = COPY [[DEF10]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[COPY45]].sub0 + ; GCN-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272 + ; GCN-NEXT: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[DEF11:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) + ; GCN-NEXT: [[COPY48:%[0-9]+]]:vreg_64 = COPY [[DEF11]] + ; GCN-NEXT: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[COPY48]].sub0 + ; GCN-NEXT: [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY50:%[0-9]+]]:vreg_64 = COPY [[DEF12]] + ; GCN-NEXT: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[COPY50]].sub0 + ; GCN-NEXT: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY54:%[0-9]+]]:vreg_64 = COPY [[DEF13]] + ; GCN-NEXT: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[COPY54]].sub0 + ; GCN-NEXT: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY56:%[0-9]+]]:vreg_64 = COPY [[DEF14]] + ; GCN-NEXT: [[COPY57:%[0-9]+]]:vgpr_32 = COPY [[COPY56]].sub0 + ; GCN-NEXT: [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN-NEXT: [[COPY58:%[0-9]+]]:vreg_64 = COPY [[DEF15]] + ; GCN-NEXT: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[COPY58]].sub0 + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[COPY60:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) + ; GCN-NEXT: [[COPY61:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288 + ; GCN-NEXT: [[COPY62:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; GCN-NEXT: [[COPY65:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152 + ; GCN-NEXT: [[COPY66:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) + ; GCN-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304 + ; GCN-NEXT: [[COPY67:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: [[COPY68:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: [[COPY69:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN-NEXT: S_ENDPGM 0 bb.0: %tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0 %buffer0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 16, i1 false, i1 false) #0 Index: llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -10,97 +10,103 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN-LABEL: name: extract_w_offset_vgpr ; GCN: bb.0.entry: - ; GCN: successors: %bb.1(0x80000000) - ; GCN: liveins: $vgpr0, $sgpr0_sgpr1 - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) - ; GCN: renamable $sgpr6 = COPY renamable $sgpr1 - ; GCN: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 - ; GCN: renamable $sgpr4 = S_MOV_B32 61440 - ; GCN: renamable $sgpr5 = S_MOV_B32 -1 - ; GCN: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: renamable $sgpr1 = COPY killed renamable $sgpr6 - ; GCN: renamable $sgpr2 = COPY killed renamable $sgpr5 - ; GCN: renamable $sgpr3 = COPY killed renamable $sgpr4 - ; GCN: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5) - ; GCN: renamable $sgpr0 = S_MOV_B32 16 - ; GCN: renamable $sgpr1 = S_MOV_B32 15 - ; GCN: renamable $sgpr2 = S_MOV_B32 14 - ; GCN: renamable $sgpr3 = S_MOV_B32 13 - ; GCN: renamable $sgpr4 = S_MOV_B32 12 - ; GCN: renamable $sgpr5 = S_MOV_B32 11 - ; GCN: renamable $sgpr6 = S_MOV_B32 10 - ; GCN: renamable $sgpr7 = S_MOV_B32 9 - ; GCN: renamable $sgpr8 = S_MOV_B32 8 - ; GCN: renamable $sgpr9 = S_MOV_B32 7 - ; GCN: renamable $sgpr10 = S_MOV_B32 6 - ; GCN: renamable $sgpr11 = S_MOV_B32 5 - ; GCN: renamable $sgpr12 = S_MOV_B32 3 - ; GCN: renamable $sgpr13 = S_MOV_B32 2 - ; GCN: renamable $sgpr14 = S_MOV_B32 1 - ; GCN: renamable $sgpr15 = S_MOV_B32 0 - ; GCN: renamable $vgpr0 = COPY killed renamable $sgpr15 - ; GCN: renamable $vgpr30 = COPY killed renamable $sgpr14 - ; GCN: renamable $vgpr29 = COPY killed renamable $sgpr13 - ; GCN: renamable $vgpr28 = COPY killed renamable $sgpr12 - ; GCN: renamable $vgpr27 = COPY killed renamable $sgpr11 - ; GCN: renamable $vgpr26 = COPY killed renamable $sgpr10 - ; GCN: renamable $vgpr25 = COPY killed renamable $sgpr9 - ; GCN: renamable $vgpr24 = COPY killed renamable $sgpr8 - ; GCN: renamable $vgpr23 = COPY killed renamable $sgpr7 - ; GCN: renamable $vgpr22 = COPY killed renamable $sgpr6 - ; GCN: renamable $vgpr21 = COPY killed renamable $sgpr5 - ; GCN: renamable $vgpr20 = COPY killed renamable $sgpr4 - ; GCN: renamable $vgpr19 = COPY killed renamable $sgpr3 - ; GCN: renamable $vgpr18 = COPY killed renamable $sgpr2 - ; GCN: renamable $vgpr17 = COPY killed renamable $sgpr1 - ; GCN: renamable $vgpr16 = COPY killed renamable $sgpr0 - ; GCN: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GCN: renamable $vgpr1 = COPY killed renamable $vgpr30 - ; GCN: renamable $vgpr2 = COPY killed renamable $vgpr29 - ; GCN: renamable $vgpr3 = COPY killed renamable $vgpr28 - ; GCN: renamable $vgpr4 = COPY killed renamable $vgpr27 - ; GCN: renamable $vgpr5 = COPY killed renamable $vgpr26 - ; GCN: renamable $vgpr6 = COPY killed renamable $vgpr25 - ; GCN: renamable $vgpr7 = COPY killed renamable $vgpr24 - ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr23 - ; GCN: renamable $vgpr9 = COPY killed renamable $vgpr22 - ; GCN: renamable $vgpr10 = COPY killed renamable $vgpr21 - ; GCN: renamable $vgpr11 = COPY killed renamable $vgpr20 - ; GCN: renamable $vgpr12 = COPY killed renamable $vgpr19 - ; GCN: renamable $vgpr13 = COPY killed renamable $vgpr18 - ; GCN: renamable $vgpr14 = COPY killed renamable $vgpr17 - ; GCN: renamable $vgpr15 = COPY killed renamable $vgpr16 - ; GCN: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5) - ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec - ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN: renamable $vgpr0 = IMPLICIT_DEF - ; GCN: renamable $sgpr0_sgpr1 = IMPLICIT_DEF - ; GCN: bb.1: - ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) - ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5) - ; GCN: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) - ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5) - ; GCN: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec - ; GCN: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec - ; GCN: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN: renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec - ; GCN: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) - ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) - ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc - ; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec - ; GCN: bb.3: - ; GCN: successors: %bb.2(0x80000000) - ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) - ; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 - ; GCN: bb.2: - ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1) - ; GCN: S_ENDPGM 0 + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) + ; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1 + ; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 + ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440 + ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 -1 + ; GCN-NEXT: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr6 + ; GCN-NEXT: renamable $sgpr2 = COPY killed renamable $sgpr5 + ; GCN-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr4 + ; GCN-NEXT: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 16 + ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 15 + ; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 14 + ; GCN-NEXT: renamable $sgpr3 = S_MOV_B32 13 + ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 12 + ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 11 + ; GCN-NEXT: renamable $sgpr6 = S_MOV_B32 10 + ; GCN-NEXT: renamable $sgpr7 = S_MOV_B32 9 + ; GCN-NEXT: renamable $sgpr8 = S_MOV_B32 8 + ; GCN-NEXT: renamable $sgpr9 = S_MOV_B32 7 + ; GCN-NEXT: renamable $sgpr10 = S_MOV_B32 6 + ; GCN-NEXT: renamable $sgpr11 = S_MOV_B32 5 + ; GCN-NEXT: renamable $sgpr12 = S_MOV_B32 3 + ; GCN-NEXT: renamable $sgpr13 = S_MOV_B32 2 + ; GCN-NEXT: renamable $sgpr14 = S_MOV_B32 1 + ; GCN-NEXT: renamable $sgpr15 = S_MOV_B32 0 + ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15 + ; GCN-NEXT: renamable $vgpr30 = COPY killed renamable $sgpr14 + ; GCN-NEXT: renamable $vgpr29 = COPY killed renamable $sgpr13 + ; GCN-NEXT: renamable $vgpr28 = COPY killed renamable $sgpr12 + ; GCN-NEXT: renamable $vgpr27 = COPY killed renamable $sgpr11 + ; GCN-NEXT: renamable $vgpr26 = COPY killed renamable $sgpr10 + ; GCN-NEXT: renamable $vgpr25 = COPY killed renamable $sgpr9 + ; GCN-NEXT: renamable $vgpr24 = COPY killed renamable $sgpr8 + ; GCN-NEXT: renamable $vgpr23 = COPY killed renamable $sgpr7 + ; GCN-NEXT: renamable $vgpr22 = COPY killed renamable $sgpr6 + ; GCN-NEXT: renamable $vgpr21 = COPY killed renamable $sgpr5 + ; GCN-NEXT: renamable $vgpr20 = COPY killed renamable $sgpr4 + ; GCN-NEXT: renamable $vgpr19 = COPY killed renamable $sgpr3 + ; GCN-NEXT: renamable $vgpr18 = COPY killed renamable $sgpr2 + ; GCN-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr1 + ; GCN-NEXT: renamable $vgpr16 = COPY killed renamable $sgpr0 + ; GCN-NEXT: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GCN-NEXT: renamable $vgpr1 = COPY killed renamable $vgpr30 + ; GCN-NEXT: renamable $vgpr2 = COPY killed renamable $vgpr29 + ; GCN-NEXT: renamable $vgpr3 = COPY killed renamable $vgpr28 + ; GCN-NEXT: renamable $vgpr4 = COPY killed renamable $vgpr27 + ; GCN-NEXT: renamable $vgpr5 = COPY killed renamable $vgpr26 + ; GCN-NEXT: renamable $vgpr6 = COPY killed renamable $vgpr25 + ; GCN-NEXT: renamable $vgpr7 = COPY killed renamable $vgpr24 + ; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr23 + ; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr22 + ; GCN-NEXT: renamable $vgpr10 = COPY killed renamable $vgpr21 + ; GCN-NEXT: renamable $vgpr11 = COPY killed renamable $vgpr20 + ; GCN-NEXT: renamable $vgpr12 = COPY killed renamable $vgpr19 + ; GCN-NEXT: renamable $vgpr13 = COPY killed renamable $vgpr18 + ; GCN-NEXT: renamable $vgpr14 = COPY killed renamable $vgpr17 + ; GCN-NEXT: renamable $vgpr15 = COPY killed renamable $vgpr16 + ; GCN-NEXT: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF + ; GCN-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5) + ; GCN-NEXT: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5) + ; GCN-NEXT: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec + ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec + ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec + ; GCN-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 + ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) + ; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1) + ; GCN-NEXT: S_ENDPGM 0 entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 %index = add i32 %id, 1 Index: llvm/test/CodeGen/AMDGPU/kernel-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -1135,29 +1135,31 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { ; SI-LABEL: v3i32_arg: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dword s2, s[0:1], 0xf +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 +; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: v_mov_b32_e32 v1, s1 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: v3i32_arg: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34 +; VI-NEXT: s_load_dword s0, s[0:1], 0x3c ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v3, s4 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v4, s5 +; VI-NEXT: v_mov_b32_e32 v4, s3 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_mov_b32_e32 v3, s2 ; VI-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-NEXT: s_endpgm ; @@ -1214,29 +1216,31 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { ; SI-LABEL: v3f32_arg: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dword s2, s[0:1], 0xf +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v0, s6 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 +; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: v_mov_b32_e32 v1, s1 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: v3f32_arg: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 +; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34 +; VI-NEXT: s_load_dword s0, s[0:1], 0x3c ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v3, s4 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v4, s5 +; VI-NEXT: v_mov_b32_e32 v4, s3 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_mov_b32_e32 v3, s2 ; VI-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-NEXT: s_endpgm ; Index: llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll +++ llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll @@ -3,7 +3,7 @@ ; Type legalization for illegal FP type results was dropping invariant ; and dereferenceable flags. -; GCN: BUFFER_LOAD_USHORT{{.*}} :: (dereferenceable invariant load (s16) from %ir.ptr, addrspace 4) +; GCN: BUFFER_LOAD_USHORT{{.*}} :: (invariant load (s16) from %ir.ptr, addrspace 4) define half @legalize_f16_load(half addrspace(4)* dereferenceable(4) %ptr) { %load = load half, half addrspace(4)* %ptr, !invariant.load !0 %add = fadd half %load, 1.0 Index: llvm/test/CodeGen/AMDGPU/store-local.96.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/store-local.96.ll +++ llvm/test/CodeGen/AMDGPU/store-local.96.ll @@ -7,53 +7,57 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, s2 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: v_mov_b32_e32 v2, s5 ; GFX9-NEXT: ds_write_b96 v3, v[0:2] ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v3, s4 -; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: v_mov_b32_e32 v2, s0 ; GFX7-NEXT: ds_write_b96 v3, v[0:2] ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: ds_write_b32 v2, v0 offset:8 +; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 ; GFX6-NEXT: ds_write_b64 v2, v[0:1] ; GFX6-NEXT: s_endpgm ; ; GFX10-LABEL: store_lds_v3i32: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s4, s[0:1], 0x3c +; GFX10-NEXT: s_load_dword s5, s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s4 -; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: v_mov_b32_e32 v2, s6 -; GFX10-NEXT: v_mov_b32_e32 v3, s2 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-NEXT: v_mov_b32_e32 v2, s4 +; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: ds_write_b96 v3, v[0:2] ; GFX10-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out @@ -63,157 +67,161 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: v_mov_b32_e32 v2, s5 -; GFX9-NEXT: ds_write_b8 v0, v1 offset:8 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:10 -; GFX9-NEXT: ds_write_b8 v0, v2 offset:4 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 -; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_lshr_b32 s0, s6, 8 -; GFX9-NEXT: ds_write_b8 v0, v1 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_lshr_b32 s0, s5, 8 +; GFX9-NEXT: s_lshr_b32 s1, s5, 24 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: s_lshr_b32 s0, s6, 24 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:9 -; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: s_lshr_b32 s0, s5, 8 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:11 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:4 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:6 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_lshr_b32 s0, s3, 8 +; GFX9-NEXT: ds_write_b8 v0, v1 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: s_lshr_b32 s0, s5, 24 +; GFX9-NEXT: s_lshr_b32 s0, s3, 24 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:5 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: s_lshr_b32 s0, s4, 8 +; GFX9-NEXT: s_lshr_b32 s0, s2, 8 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:7 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: s_lshr_b32 s0, s4, 24 +; GFX9-NEXT: s_lshr_b32 s0, s2, 24 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:1 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: ds_write_b8 v0, v1 offset:3 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: ds_write_b8 v0, v1 offset:8 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:10 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: ds_write_b8 v0, v1 offset:8 -; GFX7-NEXT: ds_write_b8 v0, v2 offset:4 -; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_lshr_b32 s3, s2, 8 -; GFX7-NEXT: ds_write_b8 v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s3 -; GFX7-NEXT: s_lshr_b32 s3, s2, 24 +; GFX7-NEXT: s_lshr_b32 s1, s0, 8 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: s_lshr_b32 s1, s0, 24 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:9 -; GFX7-NEXT: v_mov_b32_e32 v1, s3 -; GFX7-NEXT: s_lshr_b32 s2, s2, 16 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: s_lshr_b32 s1, s0, 16 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:11 -; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: s_lshr_b32 s2, s1, 8 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:10 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:4 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: s_lshr_b32 s2, s1, 24 +; GFX7-NEXT: s_lshr_b32 s1, s3, 8 +; GFX7-NEXT: ds_write_b8 v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: s_lshr_b32 s1, s3, 24 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:5 -; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: s_lshr_b32 s1, s1, 16 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: s_lshr_b32 s1, s3, 16 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:7 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_lshr_b32 s1, s0, 8 +; GFX7-NEXT: s_lshr_b32 s1, s2, 8 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:6 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_lshr_b32 s1, s0, 24 +; GFX7-NEXT: s_lshr_b32 s1, s2, 24 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:1 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_lshr_b32 s0, s0, 16 +; GFX7-NEXT: s_lshr_b32 s1, s2, 16 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:3 -; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: ds_write_b8 v0, v1 offset:2 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: ds_write_b8 v0, v1 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32_align1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v2, s1 -; GFX6-NEXT: ds_write_b8 v0, v1 offset:8 -; GFX6-NEXT: ds_write_b8 v0, v2 offset:4 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_lshr_b32 s3, s2, 8 -; GFX6-NEXT: ds_write_b8 v0, v1 -; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_lshr_b32 s3, s2, 24 +; GFX6-NEXT: s_lshr_b32 s1, s0, 8 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: s_lshr_b32 s1, s0, 24 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:9 -; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:11 -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: s_lshr_b32 s2, s1, 8 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:10 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:4 ; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: s_lshr_b32 s2, s1, 24 +; GFX6-NEXT: s_lshr_b32 s1, s3, 8 +; GFX6-NEXT: ds_write_b8 v0, v1 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: s_lshr_b32 s1, s3, 24 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:5 -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: s_lshr_b32 s1, s3, 16 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:7 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_lshr_b32 s1, s0, 8 +; GFX6-NEXT: s_lshr_b32 s1, s2, 8 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:6 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_lshr_b32 s1, s0, 24 +; GFX6-NEXT: s_lshr_b32 s1, s2, 24 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:1 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 +; GFX6-NEXT: s_lshr_b32 s1, s2, 16 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:3 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b8 v0, v1 offset:2 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: ds_write_b8 v0, v1 offset:8 ; GFX6-NEXT: s_endpgm ; ; GFX10-LABEL: store_lds_v3i32_align1: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dword s4, s[0:1], 0x3c +; GFX10-NEXT: s_load_dword s5, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-NEXT: v_mov_b32_e32 v3, s4 -; GFX10-NEXT: s_lshr_b32 s0, s6, 8 -; GFX10-NEXT: s_lshr_b32 s1, s6, 24 -; GFX10-NEXT: s_lshr_b32 s2, s5, 8 -; GFX10-NEXT: s_lshr_b32 s3, s5, 24 -; GFX10-NEXT: s_lshr_b32 s5, s4, 8 -; GFX10-NEXT: s_lshr_b32 s4, s4, 24 +; GFX10-NEXT: s_lshr_b32 s0, s4, 8 +; GFX10-NEXT: v_mov_b32_e32 v0, s5 +; GFX10-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-NEXT: v_mov_b32_e32 v2, s2 +; GFX10-NEXT: s_lshr_b32 s1, s4, 24 +; GFX10-NEXT: s_lshr_b32 s5, s3, 8 +; GFX10-NEXT: s_lshr_b32 s3, s3, 24 +; GFX10-NEXT: s_lshr_b32 s6, s2, 8 +; GFX10-NEXT: s_lshr_b32 s2, s2, 24 ; GFX10-NEXT: v_mov_b32_e32 v4, s0 +; GFX10-NEXT: v_mov_b32_e32 v3, s4 ; GFX10-NEXT: v_mov_b32_e32 v5, s1 -; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: v_mov_b32_e32 v6, s5 ; GFX10-NEXT: v_mov_b32_e32 v7, s3 -; GFX10-NEXT: v_mov_b32_e32 v8, s5 -; GFX10-NEXT: v_mov_b32_e32 v9, s4 -; GFX10-NEXT: ds_write_b8 v0, v1 offset:8 -; GFX10-NEXT: ds_write_b8_d16_hi v0, v1 offset:10 -; GFX10-NEXT: ds_write_b8 v0, v2 offset:4 -; GFX10-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 -; GFX10-NEXT: ds_write_b8 v0, v3 -; GFX10-NEXT: ds_write_b8_d16_hi v0, v3 offset:2 +; GFX10-NEXT: v_mov_b32_e32 v8, s6 +; GFX10-NEXT: v_mov_b32_e32 v9, s2 +; GFX10-NEXT: ds_write_b8 v0, v1 offset:4 +; GFX10-NEXT: ds_write_b8_d16_hi v0, v1 offset:6 +; GFX10-NEXT: ds_write_b8 v0, v2 +; GFX10-NEXT: ds_write_b8_d16_hi v0, v2 offset:2 ; GFX10-NEXT: ds_write_b8 v0, v4 offset:9 ; GFX10-NEXT: ds_write_b8 v0, v5 offset:11 ; GFX10-NEXT: ds_write_b8 v0, v6 offset:5 ; GFX10-NEXT: ds_write_b8 v0, v7 offset:7 ; GFX10-NEXT: ds_write_b8 v0, v8 offset:1 ; GFX10-NEXT: ds_write_b8 v0, v9 offset:3 +; GFX10-NEXT: ds_write_b8 v0, v3 offset:8 +; GFX10-NEXT: ds_write_b8_d16_hi v0, v3 offset:10 ; GFX10-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1 ret void @@ -222,85 +230,89 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NEXT: v_mov_b32_e32 v2, s5 -; GFX9-NEXT: ds_write_b16 v0, v1 offset:8 -; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:10 -; GFX9-NEXT: ds_write_b16 v0, v2 offset:4 -; GFX9-NEXT: ds_write_b16_d16_hi v0, v2 offset:6 -; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 +; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:6 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 ; GFX9-NEXT: ds_write_b16 v0, v1 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:2 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: ds_write_b16 v0, v1 offset:8 +; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:10 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_lshr_b32 s1, s0, 16 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:10 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:4 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: ds_write_b16 v0, v1 offset:8 -; GFX7-NEXT: ds_write_b16 v0, v2 offset:4 -; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: s_lshr_b32 s2, s2, 16 +; GFX7-NEXT: s_lshr_b32 s1, s3, 16 ; GFX7-NEXT: ds_write_b16 v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: s_lshr_b32 s1, s1, 16 -; GFX7-NEXT: ds_write_b16 v0, v1 offset:10 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: s_lshr_b32 s0, s0, 16 +; GFX7-NEXT: s_lshr_b32 s1, s2, 16 ; GFX7-NEXT: ds_write_b16 v0, v1 offset:6 -; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: ds_write_b16 v0, v1 offset:2 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 +; GFX7-NEXT: ds_write_b16 v0, v1 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32_align2: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: s_lshr_b32 s1, s0, 16 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:10 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:4 ; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v2, s1 -; GFX6-NEXT: ds_write_b16 v0, v1 offset:8 -; GFX6-NEXT: ds_write_b16 v0, v2 offset:4 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: s_lshr_b32 s1, s3, 16 ; GFX6-NEXT: ds_write_b16 v0, v1 -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 -; GFX6-NEXT: ds_write_b16 v0, v1 offset:10 ; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 +; GFX6-NEXT: s_lshr_b32 s1, s2, 16 ; GFX6-NEXT: ds_write_b16 v0, v1 offset:6 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b16 v0, v1 offset:2 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: ds_write_b16 v0, v1 offset:8 ; GFX6-NEXT: s_endpgm ; ; GFX10-LABEL: store_lds_v3i32_align2: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s6 -; GFX10-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-NEXT: v_mov_b32_e32 v3, s4 -; GFX10-NEXT: ds_write_b16 v0, v1 offset:8 -; GFX10-NEXT: ds_write_b16_d16_hi v0, v1 offset:10 -; GFX10-NEXT: ds_write_b16 v0, v2 offset:4 -; GFX10-NEXT: ds_write_b16_d16_hi v0, v2 offset:6 -; GFX10-NEXT: ds_write_b16 v0, v3 -; GFX10-NEXT: ds_write_b16_d16_hi v0, v3 offset:2 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-NEXT: v_mov_b32_e32 v2, s2 +; GFX10-NEXT: v_mov_b32_e32 v3, s5 +; GFX10-NEXT: ds_write_b16 v0, v1 offset:4 +; GFX10-NEXT: ds_write_b16_d16_hi v0, v1 offset:6 +; GFX10-NEXT: ds_write_b16 v0, v2 +; GFX10-NEXT: ds_write_b16_d16_hi v0, v2 offset:2 +; GFX10-NEXT: ds_write_b16 v0, v3 offset:8 +; GFX10-NEXT: ds_write_b16_d16_hi v0, v3 offset:10 ; GFX10-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2 ret void @@ -309,55 +321,59 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: v_mov_b32_e32 v2, s5 -; GFX9-NEXT: v_mov_b32_e32 v3, s6 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: v_mov_b32_e32 v2, s3 ; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 -; GFX9-NEXT: ds_write_b32 v0, v3 offset:8 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: ds_write_b32 v0, v1 offset:8 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align4: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s0 -; GFX7-NEXT: v_mov_b32_e32 v2, s1 -; GFX7-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: v_mov_b32_e32 v2, s3 +; GFX7-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 +; GFX7-NEXT: v_mov_b32_e32 v1, s0 ; GFX7-NEXT: ds_write_b32 v0, v1 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32_align4: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: v_mov_b32_e32 v2, s2 ; GFX6-NEXT: ds_write2_b32 v0, v2, v1 offset1:1 -; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 ; GFX6-NEXT: ds_write_b32 v0, v1 offset:8 ; GFX6-NEXT: s_endpgm ; ; GFX10-LABEL: store_lds_v3i32_align4: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-NEXT: v_mov_b32_e32 v2, s5 -; GFX10-NEXT: v_mov_b32_e32 v3, s6 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s2 +; GFX10-NEXT: v_mov_b32_e32 v2, s3 +; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 ; GFX10-NEXT: ds_write_b32 v0, v3 offset:8 ; GFX10-NEXT: s_endpgm @@ -368,57 +384,61 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: v_mov_b32_e32 v3, s6 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: ds_write_b32 v2, v3 offset:8 +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: ds_write_b64 v2, v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: ds_write_b32 v2, v0 offset:8 ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align8: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v2, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: ds_write_b32 v2, v1 offset:8 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 ; GFX7-NEXT: ds_write_b64 v2, v[0:1] +; GFX7-NEXT: v_mov_b32_e32 v0, s0 +; GFX7-NEXT: ds_write_b32 v2, v0 offset:8 ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32_align8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 ; GFX6-NEXT: ds_write_b64 v2, v[0:1] +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: ds_write_b32 v2, v0 offset:8 ; GFX6-NEXT: s_endpgm ; ; GFX10-LABEL: store_lds_v3i32_align8: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX10-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-NEXT: v_mov_b32_e32 v3, s6 -; GFX10-NEXT: v_mov_b32_e32 v0, s4 -; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: ds_write_b32 v2, v3 offset:8 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-NEXT: v_mov_b32_e32 v2, s4 +; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: ds_write_b64 v2, v[0:1] +; GFX10-NEXT: ds_write_b32 v2, v3 offset:8 ; GFX10-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 8 ret void @@ -427,53 +447,57 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s5, s[0:1], 0x3c ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, s2 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: v_mov_b32_e32 v2, s5 ; GFX9-NEXT: ds_write_b96 v3, v[0:2] ; GFX9-NEXT: s_endpgm ; ; GFX7-LABEL: store_lds_v3i32_align16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v3, s4 -; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: v_mov_b32_e32 v2, s0 ; GFX7-NEXT: ds_write_b96 v3, v[0:2] ; GFX7-NEXT: s_endpgm ; ; GFX6-LABEL: store_lds_v3i32_align16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s0, s[0:1], 0xf ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 -; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: ds_write_b32 v2, v0 offset:8 +; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 ; GFX6-NEXT: ds_write_b64 v2, v[0:1] ; GFX6-NEXT: s_endpgm ; ; GFX10-LABEL: store_lds_v3i32_align16: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s4, s[0:1], 0x3c +; GFX10-NEXT: s_load_dword s5, s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s4 -; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: v_mov_b32_e32 v2, s6 -; GFX10-NEXT: v_mov_b32_e32 v3, s2 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-NEXT: v_mov_b32_e32 v2, s4 +; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: ds_write_b96 v3, v[0:2] ; GFX10-NEXT: s_endpgm store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 16 Index: llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll =================================================================== --- llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll +++ llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll @@ -7,12 +7,20 @@ define zeroext i1 @opeq1( ; PPC64LE-LABEL: opeq1: -; PPC64LE: # %bb.0: # %"entry+land.rhs.i" -; PPC64LE-NEXT: ld 3, 0(3) -; PPC64LE-NEXT: ld 4, 0(4) -; PPC64LE-NEXT: xor 3, 3, 4 -; PPC64LE-NEXT: cntlzd 3, 3 -; PPC64LE-NEXT: rldicl 3, 3, 58, 63 +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: lwz 5, 0(3) +; PPC64LE-NEXT: lwz 6, 0(4) +; PPC64LE-NEXT: crxor 2, 2, 2 +; PPC64LE-NEXT: cmplw 1, 5, 6 +; PPC64LE-NEXT: bne 1, .LBB0_2 +; PPC64LE-NEXT: # %bb.1: # %land.rhs.i +; PPC64LE-NEXT: lwz 3, 4(3) +; PPC64LE-NEXT: lwz 4, 4(4) +; PPC64LE-NEXT: cmpw 3, 4 +; PPC64LE-NEXT: .LBB0_2: # %opeq1.exit +; PPC64LE-NEXT: li 3, 0 +; PPC64LE-NEXT: li 4, 1 +; PPC64LE-NEXT: iseleq 3, 4, 3 ; PPC64LE-NEXT: blr %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { Index: llvm/test/CodeGen/WebAssembly/reg-stackify.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/reg-stackify.ll +++ llvm/test/CodeGen/WebAssembly/reg-stackify.ll @@ -559,7 +559,7 @@ ; NOREGS: call callee ; NOREGS: i32.load 0 ; NOREGS: return -define i32 @store_past_invar_load(i32 %a, i32* %p1, i32* dereferenceable(4) %p2) { +define i32 @store_past_invar_load(i32 %a, i32* %p1, i32* byval(i32) %p2) { store i32 %a, i32* %p1 %b = load i32, i32* %p2, !invariant.load !0 call i32 @callee(i32 %a) Index: llvm/test/CodeGen/X86/load-partial.ll =================================================================== --- llvm/test/CodeGen/X86/load-partial.ll +++ llvm/test/CodeGen/X86/load-partial.ll @@ -157,14 +157,30 @@ } define <4 x float> @load_float4_float3_trunc(<4 x float>* nocapture readonly dereferenceable(16)) { -; SSE-LABEL: load_float4_float3_trunc: -; SSE: # %bb.0: -; SSE-NEXT: movaps (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: load_float4_float3_trunc: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: load_float4_float3_trunc: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: load_float4_float3_trunc: +; SSE41: # %bb.0: +; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; SSE41-NEXT: retq ; ; AVX-LABEL: load_float4_float3_trunc: ; AVX: # %bb.0: -; AVX-NEXT: vmovaps (%rdi), %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; AVX-NEXT: retq %2 = bitcast <4 x float>* %0 to i64* %3 = load i64, i64* %2, align 16 Index: llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll =================================================================== --- llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll +++ llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll @@ -9,9 +9,12 @@ ; Simple memcpy to alloca from constant address space argument. define i8 @memcpy_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) { ; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca( -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: [[ARG_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* noundef align 4 dereferenceable(32) [[ALLOCA_CAST]], i8 addrspace(4)* noundef align 4 dereferenceable(32) [[ARG_CAST]], i64 32, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %alloca = alloca [32 x i8], align 4, addrspace(5) @@ -25,9 +28,12 @@ define i8 @memcpy_constant_arg_ptr_to_alloca_load_metadata([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) { ; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_metadata( -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1, !noalias !0 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: [[ARG_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* noundef align 4 dereferenceable(32) [[ALLOCA_CAST]], i8 addrspace(4)* noundef align 4 dereferenceable(32) [[ARG_CAST]], i64 32, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1, !noalias !0 ; CHECK-NEXT: ret i8 [[LOAD]] ; %alloca = alloca [32 x i8], align 4, addrspace(5) @@ -41,9 +47,12 @@ define i64 @memcpy_constant_arg_ptr_to_alloca_load_alignment([32 x i64] addrspace(4)* noalias readonly align 4 dereferenceable(256) %arg, i32 %idx) { ; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_alignment( -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i64], [32 x i64] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64 addrspace(4)* [[GEP]], align 16 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i64], align 4, addrspace(5) +; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = bitcast [32 x i64] addrspace(5)* [[ALLOCA]] to i8 addrspace(5)* +; CHECK-NEXT: [[ARG_CAST:%.*]] = bitcast [32 x i64] addrspace(4)* [[ARG:%.*]] to i8 addrspace(4)* +; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* noundef align 4 dereferenceable(256) [[ALLOCA_CAST]], i8 addrspace(4)* noundef align 4 dereferenceable(256) [[ARG_CAST]], i64 256, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i64], [32 x i64] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64 addrspace(5)* [[GEP]], align 16 ; CHECK-NEXT: ret i64 [[LOAD]] ; %alloca = alloca [32 x i64], align 4, addrspace(5) @@ -77,9 +86,12 @@ ; Simple memmove to alloca from constant address space argument. define i8 @memmove_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) { ; CHECK-LABEL: @memmove_constant_arg_ptr_to_alloca( -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: [[ARG_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memmove.p5i8.p4i8.i32(i8 addrspace(5)* noundef align 4 dereferenceable(32) [[ALLOCA_CAST]], i8 addrspace(4)* noundef align 4 dereferenceable(32) [[ARG_CAST]], i32 32, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1 ; CHECK-NEXT: ret i8 [[LOAD]] ; %alloca = alloca [32 x i8], align 4, addrspace(5) Index: llvm/test/Transforms/LICM/hoist-deref-load.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-deref-load.ll +++ llvm/test/Transforms/LICM/hoist-deref-load.ll @@ -424,18 +424,18 @@ ; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: @@ -499,18 +499,18 @@ ; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: Index: llvm/test/Transforms/MergeICmps/X86/addressspaces.ll =================================================================== --- llvm/test/Transforms/MergeICmps/X86/addressspaces.ll +++ llvm/test/Transforms/MergeICmps/X86/addressspaces.ll @@ -10,16 +10,19 @@ ; CHECK-NEXT: [[PTR_A1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A]], i64 0, i64 1 ; CHECK-NEXT: [[PTR_B0:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[B:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[PTR_B1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[B]], i64 0, i64 1 -; CHECK-NEXT: br label %"bb1+bb2" -; CHECK: "bb1+bb2": -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A]], i64 0, i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[B]], i64 0, i64 0 -; CHECK-NEXT: [[CSTR:%.*]] = bitcast i64* [[TMP0]] to i8* -; CHECK-NEXT: [[CSTR1:%.*]] = bitcast i64* [[TMP1]] to i8* -; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16) -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[A0:%.*]] = load i64, i64* [[PTR_A0]], align 4 +; CHECK-NEXT: [[B0:%.*]] = load i64, i64* [[PTR_B0]], align 4 +; CHECK-NEXT: [[COND0:%.*]] = icmp eq i64 [[A0]], [[B0]] +; CHECK-NEXT: br i1 [[COND0]], label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[A1:%.*]] = load i64, i64* [[PTR_A1]], align 4 +; CHECK-NEXT: [[B1:%.*]] = load i64, i64* [[PTR_B1]], align 4 +; CHECK-NEXT: [[COND1:%.*]] = icmp eq i64 [[A1]], [[B1]] +; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: +; CHECK-NEXT: [[NECESSARY:%.*]] = phi i1 [ [[COND1]], [[BB2]] ], [ false, [[BB1]] ] ; CHECK-NEXT: ret void ; bb0: