Index: llvm/lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -860,12 +860,22 @@ let BaseOpcode = !cast(NAME) in { // _V* variants have different dst size, but the size is encoded implicitly, - // using dmask and tfe. Only 32-bit variant is registered with disassembler. + // using dmask and tfe. Only smallest variant is registered with disassembler. // Other variants are reconstructed by disassembler using dmask and tfe. - let VDataDwords = !if(isCmpSwap, 2, 1) in - defm _V1 : MIMG_Atomic_Addr_Helper_m ; - let VDataDwords = !if(isCmpSwap, 4, 2) in - defm _V2 : MIMG_Atomic_Addr_Helper_m ; + if !not(isCmpSwap) then { + let VDataDwords = 1 in + defm _V1 : MIMG_Atomic_Addr_Helper_m ; + } + let VDataDwords = 2 in + defm _V2 : MIMG_Atomic_Addr_Helper_m ; + let VDataDwords = 3 in + defm _V3 : MIMG_Atomic_Addr_Helper_m ; + if isCmpSwap then { + let VDataDwords = 4 in + defm _V4 : MIMG_Atomic_Addr_Helper_m ; + let VDataDwords = 5 in + defm _V5 : MIMG_Atomic_Addr_Helper_m ; + } } } // End IsAtomicRet = 1 } @@ -1113,6 +1123,8 @@ Gather4 = 1 in { let VDataDwords = 2 in defm _V2 : MIMG_Sampler_Src_Helper; /* for packed D16 only */ + let VDataDwords = 3 in + defm _V3 : MIMG_Sampler_Src_Helper; /* for packed D16 + tfe only */ let VDataDwords = 4 in defm _V4 : MIMG_Sampler_Src_Helper; let VDataDwords = 5 in Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -19,8 +19,8 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d @@ -29,8 +29,8 @@ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d @@ -39,8 +39,8 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -67,7 +67,7 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX6-NEXT: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 @@ -75,7 +75,7 @@ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX8-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 @@ -83,7 +83,7 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -108,8 +108,8 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d @@ -118,8 +118,8 @@ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d @@ -128,8 +128,8 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -156,7 +156,7 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX6-NEXT: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -164,7 +164,7 @@ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX8-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -172,7 +172,7 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX10-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 Index: llvm/test/CodeGen/AMDGPU/release-vgprs.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/release-vgprs.mir +++ llvm/test/CodeGen/AMDGPU/release-vgprs.mir @@ -403,9 +403,9 @@ body: | bb.0: ; CHECK-LABEL: name: image_atomic - ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; CHECK-NEXT: S_ENDPGM 0 - renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") S_ENDPGM 0 ... Index: llvm/test/MC/AMDGPU/gfx10_asm_mimg.s =================================================================== --- llvm/test/MC/AMDGPU/gfx10_asm_mimg.s +++ llvm/test/MC/AMDGPU/gfx10_asm_mimg.s @@ -100,9 +100,18 @@ image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; GFX10: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x10,0x21,0x3c,0xf0,0x20,0x04,0x18,0x00] +image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe +; GFX10: image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x0a,0x03,0x3d,0xf0,0x04,0x00,0x01,0x00,0x05,0x00,0x00,0x00] + image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; GFX10: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x12,0x23,0x40,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] +image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe +; GFX10: image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x0a,0x03,0x41,0xf0,0x03,0x00,0x01,0x00,0x04,0x00,0x00,0x00] + +image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe +; GFX10: image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x0f,0x41,0xf0,0x05,0x00,0x01,0x00] + image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; GFX10: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x1a,0x23,0x44,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] @@ -400,6 +409,15 @@ image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX10: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x03] +image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +; GFX10: image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x03] + +image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 +; GFX10: image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x83] + +image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 +; GFX10: image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 ; encoding: [0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x83] + image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; GFX10: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x02,0x04,0xf1,0x20,0x40,0x21,0x03] Index: llvm/test/MC/AMDGPU/gfx11_asm_mimg.s =================================================================== --- llvm/test/MC/AMDGPU/gfx11_asm_mimg.s +++ llvm/test/MC/AMDGPU/gfx11_asm_mimg.s @@ -100,9 +100,18 @@ image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; GFX11: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00] +image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe +; GFX11: image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x05,0x03,0x28,0xf0,0x04,0x00,0x21,0x00,0x05,0x00,0x00,0x00] + image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; GFX11: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] +image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe +; GFX11: image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x05,0x03,0x2c,0xf0,0x03,0x00,0x21,0x00,0x04,0x00,0x00,0x00] + +image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe +; GFX11: image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x04,0x0f,0x2c,0xf0,0x05,0x00,0x21,0x00] + image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; GFX11: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] @@ -220,6 +229,15 @@ image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64] +image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +; GFX11: image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x21,0x64] + +image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 +; GFX11: image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x01,0xbe,0xf0,0x20,0x40,0x01,0x64] + +image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 +; GFX11: image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 ; encoding: [0x00,0x01,0xbe,0xf0,0x20,0x40,0x21,0x64] + image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64] Index: llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt @@ -170,6 +170,9 @@ # GFX10: image_atomic_swap v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00] 0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00 +# GFX10: image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x0a,0x03,0x3d,0xf0,0x04,0x00,0x01,0x00,0x05,0x00,0x00,0x00] +0x0a,0x03,0x3d,0xf0,0x04,0x00,0x01,0x00,0x05,0x00,0x00,0x00 + # GFX10: image_sample_c_b v[16:19], v[8:12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03] 0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03 @@ -314,6 +317,12 @@ # GFX10: image_atomic_cmpswap v[16:17], [v8, v9], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc ; encoding: [0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00] 0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c +# GFX10: image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x0a,0x03,0x41,0xf0,0x03,0x00,0x01,0x00,0x04,0x00,0x00,0x00] +0x0a,0x03,0x41,0xf0,0x03,0x00,0x01,0x00,0x04,0x00,0x00,0x00 + +# GFX10: image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x0f,0x41,0xf0,0x05,0x00,0x01,0x00] +0x08,0x0f,0x41,0xf0,0x05,0x00,0x01,0x00 + # GFX10: image_atomic_add v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ; encoding: [0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00] 0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c @@ -443,6 +452,15 @@ # GFX10: image_gather4 v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00] 0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c +# GFX10: image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x03] +0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x03 + +# GFX10: image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x83] +0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x83 + +# GFX10: image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 ; encoding: [0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x83] +0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x83 + # GFX10: image_gather4_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00] 0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c Index: llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt @@ -99,9 +99,18 @@ # GFX11: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00] 0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00 +# GFX11: image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x05,0x03,0x28,0xf0,0x04,0x00,0x21,0x00,0x05,0x00,0x00,0x00] +0x05,0x03,0x28,0xf0,0x04,0x00,0x21,0x00,0x05,0x00,0x00,0x00 + # GFX11: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] 0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00 +# GFX11: image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x05,0x03,0x2c,0xf0,0x03,0x00,0x21,0x00,0x04,0x00,0x00,0x00] +0x05,0x03,0x2c,0xf0,0x03,0x00,0x21,0x00,0x04,0x00,0x00,0x00 + +# GFX11: image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x04,0x0f,0x2c,0xf0,0x05,0x00,0x21,0x00] +0x04,0x0f,0x2c,0xf0,0x05,0x00,0x21,0x00 + # GFX11: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00] 0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00 @@ -219,6 +228,15 @@ # GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64] 0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64 +# GFX11: image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x21,0x64] +0x00,0x01,0xbc,0xf0,0x20,0x40,0x21,0x64 + +# GFX11: image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x01,0xbe,0xf0,0x20,0x40,0x01,0x64] +0x00,0x01,0xbe,0xf0,0x20,0x40,0x01,0x64 + +# GFX11: image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 ; encoding: [0x00,0x01,0xbe,0xf0,0x20,0x40,0x21,0x64] +0x00,0x01,0xbe,0xf0,0x20,0x40,0x21,0x64 + # GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64] 0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64 Index: llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt +++ llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt @@ -191,7 +191,7 @@ # VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00] 0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00 -# VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00] +# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00] 0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00 # VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00] @@ -203,7 +203,7 @@ # VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00] 0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00 -# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00] +# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00] 0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00 #===------------------------------------------------------------------------===#