Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -176,6 +176,12 @@ let CheckMMOIsAtomic = 1; } +// Operands are swapped for atomic_store vs. regular store +def : GINodeEquiv { + let CheckMMOIsNonAtomic = 0; + let CheckMMOIsAtomic = 1; +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -138,6 +138,9 @@ bit CheckMMOIsAtomic = 1; } +def : GINodeEquiv { + bit CheckMMOIsAtomic = 1; +} def : GINodeEquiv; Index: llvm/lib/Target/AMDGPU/DSInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/DSInstructions.td +++ llvm/lib/Target/AMDGPU/DSInstructions.td @@ -719,7 +719,7 @@ // normal store. class DSAtomicWritePat : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), - (inst $ptr, $value, offset:$offset, (i1 0)) + (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 0)) >; multiclass DSAtomicWritePat_mc { Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -35,9 +35,6 @@ ; The key problem here is that we may fail to create an MBB referenced by a ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things ; happen. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %6:gpr(s32), %2:gpr(p0) :: (store seq_cst 4 into %ir.addr) (in function: pending_phis) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis -; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis: define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) { br i1 %tst, label %true, label %false @@ -72,16 +69,6 @@ ret i128 undef } -; Just to make sure we don't accidentally emit a normal load/store. -; FALLBACK-WITH-REPORT-ERR: cannot select: G_STORE %1:gpr(s64), %0:gpr64sp(p0) :: (store unordered 8 into %ir.addr) (in function: atomic_ops) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for atomic_ops -; FALLBACK-WITH-REPORT-LABEL: atomic_ops: -define i64 @atomic_ops(i64* %addr) { - store atomic i64 0, i64* %addr unordered, align 8 - %res = load atomic i64, i64* %addr seq_cst, align 8 - ret i64 %res -} - ; Make sure we don't mess up metadata arguments. declare void @llvm.write_register.i64(metadata, i64) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir @@ -0,0 +1,272 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: atomic_store_flat_s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2 + + ; GFX7-LABEL: name: atomic_store_flat_s32_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4) + ; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p0) = COPY $vgpr1_vgpr2 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + +... + +--- + +name: atomic_store_flat_v2s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2 + + ; GFX7-LABEL: name: atomic_store_flat_v2s16_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 + ; GFX7: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX9-LABEL: name: atomic_store_flat_v2s16_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 + ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst 4) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(p0) = COPY $vgpr1_vgpr2 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + +... + +--- + +name: atomic_store_flat_p3_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2 + + ; GFX7-LABEL: name: atomic_store_flat_p3_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 + ; GFX7: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX9-LABEL: name: atomic_store_flat_p3_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 + ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst 4) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p0) = COPY $vgpr1_vgpr2 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + +... + +--- + +name: atomic_store_flat_p5_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2 + + ; GFX7-LABEL: name: atomic_store_flat_p5_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 + ; GFX7: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX9-LABEL: name: atomic_store_flat_p5_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 + ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst 4) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p0) = COPY $vgpr1_vgpr2 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + +... + +--- + +name: atomic_store_flat_p6_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1_vgpr2 + + ; GFX7-LABEL: name: atomic_store_flat_p6_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 + ; GFX7: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX9-LABEL: name: atomic_store_flat_p6_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 + ; GFX9: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst 4) + %0:vgpr(p6) = COPY $vgpr0 + %1:vgpr(p0) = COPY $vgpr1_vgpr2 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + +... + +--- + +name: atomic_store_flat_s64_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: atomic_store_flat_s64_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8) + ; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(p0) = COPY $vgpr2_vgpr3 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + +... + +--- + +name: atomic_store_flat_v2s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: atomic_store_flat_v2s32_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX9-LABEL: name: atomic_store_flat_v2s32_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst 8) + %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:vgpr(p0) = COPY $vgpr2_vgpr3 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + +... + +--- + +name: atomic_store_flat_v4s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: atomic_store_flat_v4s16_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX9-LABEL: name: atomic_store_flat_v4s16_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst 8) + %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:vgpr(p0) = COPY $vgpr2_vgpr3 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + +... + +--- + +name: atomic_store_flat_p0_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: atomic_store_flat_p0_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; GFX7: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX9-LABEL: name: atomic_store_flat_p0_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; GFX9: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst 8) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(p0) = COPY $vgpr2_vgpr3 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + +... +--- + +name: atomic_store_flat_p1_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: atomic_store_flat_p1_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX9-LABEL: name: atomic_store_flat_p1_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst 8) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p0) = COPY $vgpr2_vgpr3 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir @@ -0,0 +1,343 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: atomic_store_local_s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomic_store_local_s32_seq_cst + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 4, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_s32_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 4, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_s32_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst 4, addrspace 3) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p3) = COPY $vgpr1 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + +... + +--- + +name: atomic_store_local_v2s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomic_store_local_v2s16_seq_cst + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_v2s16_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_v2s16_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(p3) = COPY $vgpr1 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + +... + +--- + +name: atomic_store_local_p3_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomic_store_local_p3_seq_cst + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_p3_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_p3_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p3) = COPY $vgpr1 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + +... + +--- + +name: atomic_store_local_p5_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomic_store_local_p5_seq_cst + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_p5_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_p5_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p3) = COPY $vgpr1 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + +... + +--- + +name: atomic_store_local_p6_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: atomic_store_local_p6_seq_cst + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_p6_seq_cst + ; GFX7: liveins: $vgpr0, $vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_p6_seq_cst + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + %0:vgpr(p6) = COPY $vgpr0 + %1:vgpr(p3) = COPY $vgpr1 + G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + +... + +--- + +name: atomic_store_local_s64_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: atomic_store_local_s64_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 8, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_s64_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 8, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_s64_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst 8, addrspace 3) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + +... + +--- + +name: atomic_store_local_v2s32_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: atomic_store_local_v2s32_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_v2s32_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_v2s32_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + +... + +--- + +name: atomic_store_local_v4s16_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: atomic_store_local_v4s16_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_v4s16_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_v4s16_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + +... + +--- + +name: atomic_store_local_p0_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: atomic_store_local_p0_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_p0_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_p0_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX9: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + %0:vgpr(p0) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + +... +--- + +name: atomic_store_local_p1_seq_cst +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX6-LABEL: name: atomic_store_local_p1_seq_cst + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX7-LABEL: name: atomic_store_local_p1_seq_cst + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX9-LABEL: name: atomic_store_local_p1_seq_cst + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -688,24 +688,25 @@ ; GFX7-LABEL: name: store_atomic_flat_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4) + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4) ; GFX8-LABEL: name: store_atomic_flat_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX8: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4) + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4) ; GFX9-LABEL: name: store_atomic_flat_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4) + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4) ; GFX10-LABEL: name: store_atomic_flat_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX10: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4) + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 0) @@ -725,24 +726,25 @@ ; GFX7-LABEL: name: store_atomic_flat_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX7: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8) + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8) ; GFX8-LABEL: name: store_atomic_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX8: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8) + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8) ; GFX9-LABEL: name: store_atomic_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8) + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8) ; GFX10-LABEL: name: store_atomic_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX10: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8) + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 0) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -786,29 +786,30 @@ ; GFX6: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1) ; GFX7-LABEL: name: store_atomic_global_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1) + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1) ; GFX7-FLAT-LABEL: name: store_atomic_global_s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX7-FLAT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1) + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1) ; GFX8-LABEL: name: store_atomic_global_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX8: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1) + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1) ; GFX9-LABEL: name: store_atomic_global_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1) + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1) ; GFX10-LABEL: name: store_atomic_global_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX10: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1) + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 1) @@ -833,29 +834,30 @@ ; GFX6: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1) ; GFX7-LABEL: name: store_atomic_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX7: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1) + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1) ; GFX7-FLAT-LABEL: name: store_atomic_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX7-FLAT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1) + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1) ; GFX8-LABEL: name: store_atomic_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX8: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1) + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1) ; GFX9-LABEL: name: store_atomic_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1) + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1) ; GFX10-LABEL: name: store_atomic_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX10: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1) + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 1) Index: llvm/test/TableGen/Common/GlobalISelEmitterCommon.td =================================================================== --- llvm/test/TableGen/Common/GlobalISelEmitterCommon.td +++ llvm/test/TableGen/Common/GlobalISelEmitterCommon.td @@ -14,6 +14,7 @@ def FPR32Op : RegisterOperand; def B0 : Register<"b0"> { let Namespace = "MyTarget"; } def GPR8 : RegisterClass<"MyTarget", [i8], 8, (add B0)>; +def GPR8Op : RegisterOperand; def p0 : PtrValueType ; Index: llvm/test/TableGen/GlobalISelEmitter-atomic_store.td =================================================================== --- /dev/null +++ llvm/test/TableGen/GlobalISelEmitter-atomic_store.td @@ -0,0 +1,24 @@ +// RUN: llvm-tblgen -gen-global-isel -optimize-match-table=false -I %p/../../include -I %p/Common %s -o - < %s | FileCheck -check-prefix=GISEL %s + +include "llvm/Target/Target.td" +include "GlobalISelEmitterCommon.td" + +def ST_ATOM_B32 : I<(outs), (ins GPR32Op:$val, GPR32Op:$ptr), []>; + +// Check that the pattern for atomic_store inverts the operands to +// match the order of G_STORE. + +// GISEL: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_STORE, +// GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/1, +// GISEL-NEXT: GIM_CheckAtomicOrderingOrStrongerThan, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::Unordered, +// GISEL-NEXT: // MIs[0] ptr +// GISEL-NEXT: GIM_CheckPointerToAny, /*MI*/0, /*Op*/1, /*SizeInBits*/0, +// GISEL-NEXT: // MIs[0] val +// GISEL-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, +// GISEL-NEXT: // (atomic_store iPTR:{ *:[iPTR] }:$ptr, i32:{ *:[i32] }:$val)<> => (ST_ATOM_B32 GPR32Op:{ *:[i32] }:$val, GPR32Op:{ *:[i32] }:$ptr) +// GISEL-NEXT: GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/MyTarget::ST_ATOM_B32, +def : Pat< +// (atomic_store_8 iPTR:$ptr, i32:$val), + (atomic_store_8 iPTR:$ptr, i32:$val), + (ST_ATOM_B32 GPR32Op:$val, GPR32Op:$ptr) +>; Index: llvm/utils/TableGen/GlobalISelEmitter.cpp =================================================================== --- llvm/utils/TableGen/GlobalISelEmitter.cpp +++ llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -3766,9 +3766,12 @@ return failedImport("Src pattern child has predicate (" + explainPredicates(Src) + ")"); } + + bool IsAtomic = false; if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic")) InsnMatcher.addPredicate("NotAtomic"); else if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) { + IsAtomic = true; InsnMatcher.addPredicate( "Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger); } @@ -3822,6 +3825,27 @@ } } + // Hack around an unfortunate mistake in how atomic store (and really + // atomicrmw in general) operands were ordered. A ISD::STORE used the order + // , order. ISD::ATOMIC_STORE used the opposite, + // , . In GlobalISel there's just the one store + // opcode, so we need to swap the operands here to get the right type check. + if (IsAtomic && SrcGIOrNull->TheDef->getName() == "G_STORE") { + assert(NumChildren == 2 && "wrong operands for atomic store"); + + TreePatternNode *PtrChild = Src->getChild(0); + TreePatternNode *ValueChild = Src->getChild(1); + + if (auto Error = importChildMatcher(Rule, InsnMatcher, PtrChild, true, + false, 1, TempOpIdx)) + return std::move(Error); + + if (auto Error = importChildMatcher(Rule, InsnMatcher, ValueChild, false, + false, 0, TempOpIdx)) + return std::move(Error); + return InsnMatcher; + } + // Match the used operands (i.e. the children of the operator). bool IsIntrinsic = SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||