Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1008,6 +1008,12 @@ return HasLdsBranchVmemWARHazard; } + // Shift amount of a 64 bit shift cannot be a highest allocated register + // if also at the end of the allocation block. + bool hasShift64HighRegBug() const { + return GFX90AInsts && !GFX940Insts; + } + // Has one cycle hazard on transcendental instruction feeding a // non transcendental VALU. bool hasTransForwardingHazard() const { return GFX940Insts; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12446,6 +12446,29 @@ } } + if (ST.hasShift64HighRegBug()) { + for (auto &MBB : MF) { + for (auto &MI : MBB) { + switch (MI.getOpcode()) { + default: + continue; + case AMDGPU::V_LSHLREV_B64_e64: + case AMDGPU::V_LSHRREV_B64_e64: + case AMDGPU::V_ASHRREV_I64_e64: + break; + } + + MachineOperand *Amt = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + if (!Amt->isReg()) + continue; + Register AmtReg = Amt->getReg(); + if (!TRI->isVGPR(MRI, AmtReg)) + continue; + MRI.constrainRegClass(AmtReg, &AMDGPU::VReg_32_Dec8RegClass); + } + } + } + TargetLoweringBase::finalizeLowering(MF); } Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -440,7 +440,8 @@ // RegBankSelect in the GISel flow. The aligned regclasses are not fully given // until Instruction selection. if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) { - if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass) + if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass || + RC == &AMDGPU::VReg_32_Dec8RegClass) return &AMDGPU::AV_32RegClass; if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass) return &AMDGPU::AV_64RegClass; Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -836,6 +836,11 @@ defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>; defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>; +def VReg_32_Dec8 : VRegClassBase<1, VGPR_32.RegTypes, + (sub VGPR_32, (sequence "VGPR%u", 7, 255, 8))> { + let HasVGPR = 1; +} + multiclass ARegClass regTypes, dag regList> { let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in { // Define the regular class. Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1993,6 +1993,7 @@ case AMDGPU::SReg_32RegClassID: case AMDGPU::SReg_32_XM0RegClassID: case AMDGPU::SRegOrLds_32RegClassID: + case AMDGPU::VReg_32_Dec8RegClassID: return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -91,7 +91,7 @@ define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2031626 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -121,7 +121,7 @@ define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2949130 /* regdef:VReg_64 */, def %1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 3014666 /* regdef:VReg_64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) @@ -164,7 +164,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1966089 /* reguse:SReg_32 */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 2031625 /* reguse:SReg_32 */, [[COPY]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) ret void @@ -232,13 +232,13 @@ define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2031626 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %2 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 2031626 /* regdef:SReg_32 */, def %2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %4, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 2031626 /* regdef:SReg_32 */, def %4, 2031625 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -282,7 +282,7 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 2031626 /* regdef:SReg_32 */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) Index: llvm/test/CodeGen/AMDGPU/hazard-shift64.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/hazard-shift64.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX940 %s + +; GCN-LABEL: {{^}}highest_reg_lshr_amt: +; GFX90A: v_lshrrev_b64 v[0:1], v8, v[0:1] +; GFX940: v_lshrrev_b64 v[0:1], v7, v[0:1] +define i64 @highest_reg_lshr_amt(i64 %x, i64* %ptr) { + %amt = load i64, i64* %ptr + call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6}" () + %v = lshr i64 %x, %amt + ret i64 %v +} + +; GCN-LABEL: {{^}}highest_sreg_lshr_amt: +; GCN: v_lshrrev_b64 v[0:1], s0, v[0:1] +define amdgpu_ps void @highest_sreg_lshr_amt(i64 %x, i64 inreg %amt) { + %v = lshr i64 %x, %amt + store i64 %v, i64* undef + ret void +} + +; GCN-LABEL: {{^}}highest_imm_lshr_amt: +; GCN: v_lshrrev_b64 v[0:1], 4, v[0:1] +define void @highest_imm_lshr_amt(i64 %x) { + %v = lshr i64 %x, 4 + store i64 %v, i64* undef + ret void +} + +; GCN-LABEL: {{^}}highest_reg_ashr_amt: +; GFX90A: v_ashrrev_i64 v[0:1], v8, v[0:1] +; GFX940: v_ashrrev_i64 v[0:1], v7, v[0:1] +define i64 @highest_reg_ashr_amt(i64 %x, i64* %ptr) { + %amt = load i64, i64* %ptr + call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6}" () + %v = ashr i64 %x, %amt + ret i64 %v +} + +; GCN-LABEL: {{^}}highest_reg_shl_amt: +; GFX90A: v_lshlrev_b64 v[0:1], v8, v[0:1] +; GFX940: v_lshlrev_b64 v[0:1], v7, v[0:1] +define i64 @highest_reg_shl_amt(i64 %x, i64* %ptr) { + %amt = load i64, i64* %ptr + call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6}" () + %v = shl i64 %x, %amt + ret i64 %v +} Index: llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7667722 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7667721 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7667722 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7667721 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5832713 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7012362 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7012361 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6946826 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6946825 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) Index: llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -7,7 +7,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX908-LABEL: name: partial_copy ; REGALLOC-GFX908: bb.0 (%ir-block.0): - ; REGALLOC-GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def [[VREG_64:%[0-9]+]] + ; REGALLOC-GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3014666 /* regdef:VReg_64 */, def [[VREG_64:%[0-9]+]] ; REGALLOC-GFX908: SI_SPILL_V64_SAVE [[VREG_64]], %stack.0 ; REGALLOC-GFX908: [[V_MFMA_I32_4X4X4I8_A128:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 ; REGALLOC-GFX908: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0 @@ -17,7 +17,7 @@ ; ; PEI-GFX908-LABEL: name: partial_copy ; PEI-GFX908: bb.0 (%ir-block.0): - ; PEI-GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 + ; PEI-GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3014666 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1 ; PEI-GFX908: BUFFER_STORE_DWORD_OFFSET killed $vgpr0 ; PEI-GFX908: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1 ; PEI-GFX908: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 @@ -29,7 +29,7 @@ ; ; REGALLOC-GFX90A-LABEL: name: partial_copy ; REGALLOC-GFX90A: bb.0 (%ir-block.0): - ; REGALLOC-GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def [[VREG_64:%[0-9]+]] + ; REGALLOC-GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3342346 /* regdef:VReg_64_Align2 */, def [[VREG_64:%[0-9]+]] ; REGALLOC-GFX90A: SI_SPILL_V64_SAVE [[VREG_64]], %stack.0 ; REGALLOC-GFX90A: [[V_MFMA_I32_4X4X4I8_A128:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 ; REGALLOC-GFX90A: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0 @@ -38,7 +38,7 @@ ; ; PEI-GFX90A-LABEL: name: partial_copy ; PEI-GFX90A: bb.0 (%ir-block.0): - ; PEI-GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1 + ; PEI-GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3342346 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1 ; PEI-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0 ; PEI-GFX90A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1 ; PEI-GFX90A: renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 Index: llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll +++ llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll @@ -19,7 +19,7 @@ ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN-NEXT: undef %23.sub0:vreg_64 = COPY [[SI_SPILL_AV64_RESTORE]].sub0 - ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2949129 /* reguse:VReg_64 */, %23 + ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3014665 /* reguse:VReg_64 */, %23 ; GCN-NEXT: S_ENDPGM 0 %v0 = call i32 asm sideeffect "; def $0", "=v"() %tmp = insertelement <2 x i32> undef, i32 %v0, i32 0