Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -799,7 +799,7 @@ def VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4], (add VGPR_64)>; def VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>; -def VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, i128], (add VGPR_128)>; +def VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64], (add VGPR_128)>; def VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>; def VReg_192 : VRegClass<6, [untyped], (add VGPR_192)>; def VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64], (add VGPR_256)>; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -384,24 +384,24 @@ ; GFX7-LABEL: name: load_flat_s128 ; GFX7: liveins: $vgpr0_vgpr1 - ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) - ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) - ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -452,29 +452,29 @@ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX7-LABEL: name: load_global_s128 ; GFX7: liveins: $vgpr0_vgpr1 - ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) - ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX7-FLAT-LABEL: name: load_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 - ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) - ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_global_s128 ; GFX8: liveins: $vgpr0_vgpr1 - ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_global_s128 ; GFX9: liveins: $vgpr0_vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_global_s128 ; GFX10: liveins: $vgpr0_vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) - ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir @@ -191,15 +191,15 @@ ; GFX7-LABEL: name: load_local_s128 ; GFX7: liveins: $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 16, align 8, addrspace 3) - ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_local_s128 ; GFX9: liveins: $vgpr0 - ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 16, align 8, addrspace 3) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -198,24 +198,24 @@ ; GFX7-LABEL: name: store_flat_s128 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16) ; GFX8-LABEL: name: store_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16) ; GFX9-LABEL: name: store_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16) ; GFX10-LABEL: name: store_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store 16, align 16, addrspace 0) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -239,29 +239,29 @@ ; GFX6: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) ; GFX7-LABEL: name: store_global_s128 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7-FLAT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) ; GFX8-LABEL: name: store_global_s128 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) ; GFX9-LABEL: name: store_global_s128 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) ; GFX10-LABEL: name: store_global_s128 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store 16, align 16, addrspace 1) Index: llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -stop-after=finalize-isel -o - %s | FileCheck %s + +; Make sure we only use one 128-bit register instead of 2 for i128 asm +; constraints + +define amdgpu_kernel void @s_input_output_i128() { + ; CHECK-LABEL: name: s_input_output_i128 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3997706 /* regdef:SGPR_128 */, def %4 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 + ; CHECK: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3997705 /* reguse:SGPR_128 */, [[COPY]] + ; CHECK: S_ENDPGM 0 + %val = tail call i128 asm sideeffect "; def $0", "=s"() + call void asm sideeffect "; use $0", "s"(i128 %val) + ret void +} + +define amdgpu_kernel void @v_input_output_i128() { + ; CHECK-LABEL: name: v_input_output_i128 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_128 */, def %4 + ; CHECK: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 + ; CHECK: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_128 */, [[COPY]] + ; CHECK: S_ENDPGM 0 + %val = tail call i128 asm sideeffect "; def $0", "=v"() + call void asm sideeffect "; use $0", "v"(i128 %val) + ret void +} + +define amdgpu_kernel void @a_input_output_i128() { + ; CHECK-LABEL: name: a_input_output_i128 + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3801098 /* regdef:AReg_128 */, def %4 + ; CHECK: [[COPY:%[0-9]+]]:areg_128 = COPY %4 + ; CHECK: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3801097 /* reguse:AReg_128 */, [[COPY]] + ; CHECK: S_ENDPGM 0 + %val = call i128 asm sideeffect "; def $0", "=a"() + call void asm sideeffect "; use $0", "a"(i128 %val) + ret void +}