Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -94,6 +94,7 @@ bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; bool selectG_UNMERGE_VALUES(MachineInstr &I) const; + bool selectG_BUILD_VECTOR_TRUNC(MachineInstr &I) const; bool selectG_PTR_ADD(MachineInstr &I) const; bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -595,6 +595,90 @@ return true; } +static bool isZero(Register Reg, const MachineRegisterInfo &MRI) { + int64_t Val; + return mi_match(Reg, MRI, m_ICst(Val)) && Val == 0; +} + +bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( + MachineInstr &MI) const { + if (selectImpl(MI, *CoverageInfo)) + return true; + + const LLT S32 = LLT::scalar(32); + const LLT V2S16 = LLT::vector(2, 16); + + Register Dst = MI.getOperand(0).getReg(); + if (MRI->getType(Dst) != V2S16) + return false; + + const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI); + if (DstBank->getID() != AMDGPU::SGPRRegBankID) + return false; + + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + if (MRI->getType(Src0) != S32) + return false; + + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock *BB = MI.getParent(); + + // TODO: This should probably be a combine somewhere + // (build_vector_trunc $src0, undef -> copy $src0 + MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI); + if (Src1Def && Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) { + MI.setDesc(TII.get(AMDGPU::COPY)); + MI.RemoveOperand(2); + return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI) && + RBI.constrainGenericRegister(Src0, AMDGPU::SReg_32RegClass, *MRI); + } + + Register ShiftSrc0; + Register ShiftSrc1; + int64_t ShiftAmt; + + // With multiple uses of the shift, this will duplicate the shift and + // increase register pressure. + // + // (build_vector_trunc (lshr_oneuse $src0, 16), (lshr_oneuse $src1, 16) + // => (S_PACK_HH_B32_B16 $src0, $src1) + // (build_vector_trunc $src0, (lshr_oneuse SReg_32:$src1, 16)) + // => (S_PACK_LH_B32_B16 $src0, $src1) + // (build_vector_trunc $src0, $src1) + // => (S_PACK_LL_B32_B16 $src0, $src1) + + // FIXME: This is an inconvenient way to check a specific value + bool Shift0 = mi_match( + Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_ICst(ShiftAmt)))) && + ShiftAmt == 16; + + bool Shift1 = mi_match( + Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_ICst(ShiftAmt)))) && + ShiftAmt == 16; + + unsigned Opc = AMDGPU::S_PACK_LL_B32_B16; + if (Shift0 && Shift1) { + Opc = AMDGPU::S_PACK_HH_B32_B16; + MI.getOperand(1).setReg(ShiftSrc0); + MI.getOperand(2).setReg(ShiftSrc1); + } else if (Shift1) { + Opc = AMDGPU::S_PACK_LH_B32_B16; + MI.getOperand(2).setReg(ShiftSrc1); + } else if (Shift0 && isZero(Src1, *MRI)) { + // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16 + auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst) + .addReg(ShiftSrc0) + .addImm(16); + + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + } + + MI.setDesc(TII.get(Opc)); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectG_PTR_ADD(MachineInstr &I) const { return selectG_ADD_SUB(I); } @@ -2062,6 +2146,8 @@ return selectG_MERGE_VALUES(I); case TargetOpcode::G_UNMERGE_VALUES: return selectG_UNMERGE_VALUES(I); + case TargetOpcode::G_BUILD_VECTOR_TRUNC: + return selectG_BUILD_VECTOR_TRUNC(I); case TargetOpcode::G_PTR_ADD: return selectG_PTR_ADD(I); case TargetOpcode::G_IMPLICIT_DEF: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -1,5 +1,4 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# XFAIL: * # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- @@ -20,8 +19,8 @@ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 - S_ENDPGM 0, implicit %4 + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + S_ENDPGM 0, implicit %2 ... --- @@ -42,10 +41,8 @@ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %3 S_ENDPGM 0, implicit %4 ... @@ -71,10 +68,8 @@ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 %3:sgpr(s32) = G_LSHR %1, %2 - %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %0 S_ENDPGM 0, implicit %4 ... @@ -97,11 +92,9 @@ ; GFX9: S_ENDPGM 0, implicit [[S_PACK_HH_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 - %2:sgpr(s32) = G_CONSTANT i32 16 %3:sgpr(s32) = G_LSHR %0, %2 %4:sgpr(s32) = G_LSHR %1, %2 - %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 S_ENDPGM 0, implicit %5 ... @@ -142,40 +135,81 @@ ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_0_s32_s_s32 ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc - ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_CONSTANT i32 0 - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0 S_ENDPGM 0, implicit %2 ... --- -name: test_build_vector_v_v2s16_v_s32_s_undef_s32 +name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32 legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $sgpr0 - ; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s32_s_undef_s32 - ; GFX9: liveins: $vgpr0 - ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32 + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: S_ENDPGM 0, implicit [[COPY]] - %0:vgpr(s32) = COPY $vgpr0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_IMPLICIT_DEF + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32 + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[COPY]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_IMPLICIT_DEF + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_build_vector_trunc_s_v2s16_s_undef_s_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr1 - %2:vgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_undef_s_s32 + ; GFX9: liveins: $sgpr1 + ; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[COPY]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + %0:sgpr(s32) = G_IMPLICIT_DEF + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 S_ENDPGM 0, implicit %2 ... --- -name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32 +name: test_build_vector_trunc_s_v2s16_s_s32_undef legalized: true regBankSelected: true tracksRegLiveness: true @@ -184,20 +218,40 @@ bb.0: liveins: $sgpr0 - ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_undef ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: S_ENDPGM 0, implicit [[COPY]] %0:sgpr(s32) = COPY $sgpr0 - %1:sgpr(s32) = G_IMPLICIT_DEF + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_build_vector_trunc_s_v2s16_s_zero_s_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_zero_s_s32 + ; GFX9: liveins: $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[COPY]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + %0:sgpr(s32) = G_CONSTANT i32 0 + %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 S_ENDPGM 0, implicit %2 ... --- -name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32 +name: test_build_vector_trunc_s_v2s16_s_s32_zero legalized: true regBankSelected: true tracksRegLiveness: true @@ -206,15 +260,173 @@ bb.0: liveins: $sgpr0 - ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32 + ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_zero ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc - ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_CONSTANT i32 0 + %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + S_ENDPGM 0, implicit %2 +... - %1:sgpr(s32) = G_IMPLICIT_DEF +--- +name: test_build_vector_trunc_lshr16_zero +legalized: true +regBankSelected: true +tracksRegLiveness: true - %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0 - S_ENDPGM 0, implicit %2 +body: | + bb.0: + liveins: $sgpr0 + + ; GFX9-LABEL: name: test_build_vector_trunc_lshr16_zero + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + %0:sgpr(s32) = G_CONSTANT i32 0 + %1:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s32) = G_CONSTANT i32 16 + %3:sgpr(s32) = G_LSHR %1, %2 + %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %0 + S_ENDPGM 0, implicit %4 +... + +# Don't use pack since it would duplicate the shift use +--- +name: test_build_vector_trunc_s_pack_lh_multi_use +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_lh_multi_use + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_LSHR_B32_]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]], implicit [[S_LSHR_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = G_CONSTANT i32 16 + %3:sgpr(s32) = G_LSHR %1, %2 + %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %3 + S_ENDPGM 0, implicit %4, implicit %3 +... + +--- +name: test_build_vector_trunc_s_pack_hh_multi_use_lhs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_lhs + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[S_LSHR_B32_]], [[COPY1]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]], implicit [[S_LSHR_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = G_CONSTANT i32 16 + %3:sgpr(s32) = G_LSHR %0, %2 + %4:sgpr(s32) = G_LSHR %1, %2 + %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 + S_ENDPGM 0, implicit %5, implicit %3 +... + +--- +name: test_build_vector_trunc_s_pack_hh_multi_use_rhs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_rhs + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[S_LSHR_B32_1]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]], implicit [[S_LSHR_B32_1]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = G_CONSTANT i32 16 + %3:sgpr(s32) = G_LSHR %0, %2 + %4:sgpr(s32) = G_LSHR %1, %2 + %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 + S_ENDPGM 0, implicit %5, implicit %4 +... + +--- +name: test_build_vector_trunc_s_pack_lh_wrong_shift_amt +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_lh_wrong_shift_amt + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_LSHR_B32_]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = G_CONSTANT i32 15 + %3:sgpr(s32) = G_LSHR %1, %2 + %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: test_build_vector_trunc_s_pack_hh_wrong_shift_amt +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hh_wrong_shift_amt + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 15 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[S_LSHR_B32_1]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = G_CONSTANT i32 15 + %3:sgpr(s32) = G_LSHR %0, %2 + %4:sgpr(s32) = G_LSHR %1, %2 + %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4 + S_ENDPGM 0, implicit %5 ...