diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -779,13 +779,13 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { defm GLOBAL_ATOMIC_FCMPSWAP : - FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>; defm GLOBAL_ATOMIC_FMIN : FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32, int_amdgcn_global_atomic_fmin>; defm GLOBAL_ATOMIC_FMAX : FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32, int_amdgcn_global_atomic_fmax>; defm GLOBAL_ATOMIC_FCMPSWAP_X2 : - FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>; defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64, int_amdgcn_global_atomic_fmin>; defm GLOBAL_ATOMIC_FMAX_X2 : diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir --- a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir +++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir @@ -152,7 +152,7 @@ name: global_atomic_fcmpswap_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -164,7 +164,7 @@ name: global_atomic_fcmpswap_x2_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -224,7 +224,7 @@ name: global_atomic_fcmpswap_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) + %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -236,7 +236,7 @@ name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) + %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -296,7 +296,7 @@ name: global_atomic_fcmpswap_saddr_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -308,7 +308,7 @@ name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) + %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_128, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_flat.s b/llvm/test/MC/AMDGPU/gfx10_asm_flat.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_flat.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_flat.s @@ -225,6 +225,46 @@ flat_atomic_xor_x2 v[1:2], v[2:3] dlc // GFX10: [0x00,0x10,0x6c,0xdd,0x01,0x02,0x7d,0x00] +//===----------------------------------------------------------------------===// +// ENC_FLAT_GLOBAL: fcmpswap. +//===----------------------------------------------------------------------===// + +global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 +// GFX10: [0xff,0x8f,0xf8,0xdc,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap v5, v[1:2], v[2:3], off offset:-1 glc +// GFX10: [0xff,0x8f,0xf9,0xdc,0x01,0x02,0x7d,0x05] + +global_atomic_fcmpswap v[1:2], v[2:3], off offset:2047 +// GFX10: [0xff,0x87,0xf8,0xdc,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap v[1:2], v[2:3], off offset:-2048 +// GFX10: [0x00,0x88,0xf8,0xdc,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 slc +// GFX10: [0xff,0x8f,0xfa,0xdc,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 dlc +// GFX10: [0xff,0x9f,0xf8,0xdc,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 +// GFX10: [0xff,0x8f,0x78,0xdd,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap_x2 v[5:6], v[1:2], v[2:5], off offset:-1 glc +// GFX10: [0xff,0x8f,0x79,0xdd,0x01,0x02,0x7d,0x05] + +global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:2047 +// GFX10: [0xff,0x87,0x78,0xdd,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-2048 +// GFX10: [0x00,0x88,0x78,0xdd,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 slc +// GFX10: [0xff,0x8f,0x7a,0xdd,0x01,0x02,0x7d,0x00] + +global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 dlc +// GFX10: [0xff,0x9f,0x78,0xdd,0x01,0x02,0x7d,0x00] + //===----------------------------------------------------------------------===// // ENC_FLAT_GLOBAL: dlc support for atomics. //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt b/llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt --- a/llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/flat_gfx10.txt @@ -267,3 +267,43 @@ # CHECK: global_atomic_xor_x2 v[1:2], v[2:3], off dlc ; encoding: [0x00,0x90,0x6c,0xdd,0x01,0x02,0x7d,0x00] 0x00,0x90,0x6c,0xdd,0x01,0x02,0x7d,0x00 + +#===------------------------------------------------------------------------===# +# FLAT GLOBAL opcodes: fcmpswap +#===------------------------------------------------------------------------===# + +# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x8f,0xf8,0xdc,0x01,0x02,0x7d,0x00] +0xff,0x8f,0xf8,0xdc,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap v5, v[1:2], v[2:3], off offset:-1 glc ; encoding: [0xff,0x8f,0xf9,0xdc,0x01,0x02,0x7d,0x05] +0xff,0x8f,0xf9,0xdc,0x01,0x02,0x7d,0x05 + +# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:2047 ; encoding: [0xff,0x87,0xf8,0xdc,0x01,0x02,0x7d,0x00] +0xff,0x87,0xf8,0xdc,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:-2048 ; encoding: [0x00,0x88,0xf8,0xdc,0x01,0x02,0x7d,0x00] +0x00,0x88,0xf8,0xdc,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 slc ; encoding: [0xff,0x8f,0xfa,0xdc,0x01,0x02,0x7d,0x00] +0xff,0x8f,0xfa,0xdc,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap v[1:2], v[2:3], off offset:-1 dlc ; encoding: [0xff,0x9f,0xf8,0xdc,0x01,0x02,0x7d,0x00] +0xff,0x9f,0xf8,0xdc,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 ; encoding: [0xff,0x8f,0x78,0xdd,0x01,0x02,0x7d,0x00] +0xff,0x8f,0x78,0xdd,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap_x2 v[5:6], v[1:2], v[2:5], off offset:-1 glc ; encoding: [0xff,0x8f,0x79,0xdd,0x01,0x02,0x7d,0x05] +0xff,0x8f,0x79,0xdd,0x01,0x02,0x7d,0x05 + +# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:2047 ; encoding: [0xff,0x87,0x78,0xdd,0x01,0x02,0x7d,0x00] +0xff,0x87,0x78,0xdd,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-2048 ; encoding: [0x00,0x88,0x78,0xdd,0x01,0x02,0x7d,0x00] +0x00,0x88,0x78,0xdd,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 slc ; encoding: [0xff,0x8f,0x7a,0xdd,0x01,0x02,0x7d,0x00] +0xff,0x8f,0x7a,0xdd,0x01,0x02,0x7d,0x00 + +# CHECK: global_atomic_fcmpswap_x2 v[1:2], v[2:5], off offset:-1 dlc ; encoding: [0xff,0x9f,0x78,0xdd,0x01,0x02,0x7d,0x00] +0xff,0x9f,0x78,0xdd,0x01,0x02,0x7d,0x00