Index: llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h +++ llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h @@ -23,6 +23,7 @@ std::tuple getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg); +bool isLegalVOP3PShuffleMask(ArrayRef Mask); } } Index: llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -43,3 +43,12 @@ return std::make_tuple(Reg, 0, Def); } + +bool AMDGPU::isLegalVOP3PShuffleMask(ArrayRef Mask) { + assert(Mask.size() == 2); + + // If one half is undef, the other is trivially in the same reg. + if (Mask[0] == -1 || Mask[1] == -1) + return true; + return (Mask[0] & 2) == (Mask[1] & 2); +} Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -125,6 +125,7 @@ bool selectG_PTR_MASK(MachineInstr &I) const; bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const; bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const; + bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const; std::pair selectVOP3ModsImpl(MachineOperand &Root) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2100,6 +2100,155 @@ return true; } +static bool isZeroOrUndef(int X) { + return X == 0 || X == -1; +} + +static bool isOneOrUndef(int X) { + return X == 1 || X == -1; +} + +static bool isZeroOrOneOrUndef(int X) { + return X == 0 || X == 1 || X == -1; +} + +// Normalize a VOP3P shuffle mask to refer to the low/high half of a single +// 32-bit register. +static Register normalizeVOP3PMask(int NewMask[2], Register Src0, Register Src1, + ArrayRef Mask) { + NewMask[0] = Mask[0]; + NewMask[1] = Mask[1]; + if (isZeroOrOneOrUndef(Mask[0]) && isZeroOrOneOrUndef(Mask[1])) + return Src0; + + assert(NewMask[0] == 2 || NewMask[0] == 3 || NewMask[0] == -1); + assert(NewMask[1] == 2 || NewMask[1] == 3 || NewMask[1] == -1); + + // Shift the mask inputs to be 0/1; + NewMask[0] = NewMask[0] == -1 ? -1 : NewMask[0] - 2; + NewMask[1] = NewMask[1] == -1 ? -1 : NewMask[1] - 2; + return Src1; +} + +// This is only legal with VOP3P instructions as an aid to op_sel matching. +bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR( + MachineInstr &MI) const { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(1).getReg(); + Register Src1Reg = MI.getOperand(2).getReg(); + ArrayRef ShufMask = MI.getOperand(3).getShuffleMask(); + + const LLT V2S16 = LLT::vector(2, 16); + if (MRI->getType(DstReg) != V2S16 || MRI->getType(Src0Reg) != V2S16) + return false; + + if (!AMDGPU::isLegalVOP3PShuffleMask(ShufMask)) + return false; + + assert(ShufMask.size() == 2); + assert(STI.hasSDWA() && "no target has VOP3P but not SDWA"); + + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID; + const TargetRegisterClass &RC = IsVALU ? + AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; + + // Handle the degenerate case which should have folded out. + if (ShufMask[0] == -1 && ShufMask[1] == -1) { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), DstReg); + + MI.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, RC, *MRI); + } + + // A legal VOP3P mask only reads one of the sources. + int Mask[2]; + Register SrcVec = normalizeVOP3PMask(Mask, Src0Reg, Src1Reg, ShufMask); + + if (!RBI.constrainGenericRegister(DstReg, RC, *MRI) || + !RBI.constrainGenericRegister(SrcVec, RC, *MRI)) + return false; + + // TODO: This also should have been folded out + if (isZeroOrUndef(Mask[0]) && isOneOrUndef(Mask[1])) { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::COPY), DstReg) + .addReg(SrcVec); + + MI.eraseFromParent(); + return true; + } + + if (Mask[0] == 1 && Mask[1] == -1) { + if (IsVALU) { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg) + .addImm(16) + .addReg(SrcVec); + } else { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg) + .addReg(SrcVec) + .addImm(16); + } + } else if (isZeroOrUndef(Mask[0]) && Mask[1] == 0) { + if (IsVALU) { + // Write low half of the register into the high half. + MachineInstr *MovSDWA = + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) + .addImm(0) // $src0_modifiers + .addReg(SrcVec) // $src0 + .addImm(0) // $clamp + .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel + .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused + .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel + .addReg(SrcVec, RegState::Implicit); + MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + } else { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) + .addReg(SrcVec) + .addReg(SrcVec); + } + } else if (Mask[0] == 1 && Mask[1] == 1) { + if (IsVALU) { + // Write high half of the register into the low half. + MachineInstr *MovSDWA = + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) + .addImm(0) // $src0_modifiers + .addReg(SrcVec) // $src0 + .addImm(0) // $clamp + .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel + .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused + .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel + .addReg(SrcVec, RegState::Implicit); + MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + } else { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HH_B32_B16), DstReg) + .addReg(SrcVec) + .addReg(SrcVec); + } + } else if (Mask[0] == 1 && Mask[1] == 0) { + if (IsVALU) { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_ALIGNBIT_B32), DstReg) + .addReg(SrcVec) + .addReg(SrcVec) + .addImm(16); + } else { + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg) + .addReg(SrcVec) + .addImm(16); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) + .addReg(TmpReg) + .addReg(SrcVec); + } + } else + llvm_unreachable("all shuffle masks should be handled"); + + MI.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::select(MachineInstr &I) { if (I.isPHI()) return selectPHI(I); @@ -2202,6 +2351,8 @@ return selectG_EXTRACT_VECTOR_ELT(I); case TargetOpcode::G_INSERT_VECTOR_ELT: return selectG_INSERT_VECTOR_ELT(I); + case TargetOpcode::G_SHUFFLE_VECTOR: + return selectG_SHUFFLE_VECTOR(I); case AMDGPU::G_AMDGPU_ATOMIC_INC: case AMDGPU::G_AMDGPU_ATOMIC_DEC: initM0(I); Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1816,16 +1816,6 @@ return true; } -static bool isLegalVOP3PShuffleMask(ArrayRef Mask) { - assert(Mask.size() == 2); - - // If one half is undef, the other is trivially in the same reg. - if (Mask[0] == -1 || Mask[1] == -1) - return true; - return ((Mask[0] == 0 || Mask[0] == 1) && (Mask[1] == 0 || Mask[1] == 1)) || - ((Mask[0] == 2 || Mask[0] == 3) && (Mask[1] == 2 || Mask[1] == 3)); -} - bool AMDGPULegalizerInfo::legalizeShuffleVector( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -1837,7 +1827,7 @@ LLT SrcTy = MRI.getType(Src0); if (SrcTy == V2S16 && DstTy == V2S16 && - isLegalVOP3PShuffleMask(MI.getOperand(3).getShuffleMask())) + AMDGPU::isLegalVOP3PShuffleMask(MI.getOperand(3).getShuffleMask())) return true; MachineIRBuilder HelperBuilder(MI); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir @@ -0,0 +1,740 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX9 %s + +--- +name: v_shufflevector_v2s16_v2s16_u_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_u + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9: $vgpr0 = COPY [[DEF]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_0_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_u + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: $vgpr0 = COPY [[COPY]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_u_0 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_0 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) + ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_1_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_u + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX9: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, undef) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_u_1 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_1 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: $vgpr0 = COPY [[COPY]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 1) + $vgpr0 = COPY %2 + +... + + +--- +name: v_shufflevector_v2s16_v2s16_2_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_u + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: $vgpr0 = COPY [[COPY]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_u_2 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_2 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) + ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_3_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_u + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX9: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_u_3 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_3 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: $vgpr0 = COPY [[COPY]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_0_0 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_0 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) + ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_0_1 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_1 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: $vgpr0 = COPY [[COPY]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_1_0 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_0 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_ALIGNBIT_B32_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32 [[COPY]], [[COPY]], 16, implicit $exec + ; GFX9: $vgpr0 = COPY [[V_ALIGNBIT_B32_]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_1_1 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_1 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 4, 2, 5, implicit $exec, implicit [[COPY]](tied-def 0) + ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_2_2 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_2 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) + ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_2_3 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_3 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: $vgpr0 = COPY [[COPY]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_3_2 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_2 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_ALIGNBIT_B32_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32 [[COPY]], [[COPY]], 16, implicit $exec + ; GFX9: $vgpr0 = COPY [[V_ALIGNBIT_B32_]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2) + $vgpr0 = COPY %2 + +... + +--- +name: v_shufflevector_v2s16_v2s16_3_3 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_3 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 4, 2, 5, implicit $exec, implicit [[COPY]](tied-def 0) + ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3) + $vgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_u_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_u + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX9: $sgpr0 = COPY [[DEF]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_0_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_u + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: $sgpr0 = COPY [[COPY]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_u_0 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_0 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] + ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_1_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_u + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: $sgpr0 = COPY [[S_LSHR_B32_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, undef) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_u_1 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_1 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: $sgpr0 = COPY [[COPY]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 1) + $sgpr0 = COPY %2 + +... + + +--- +name: s_shufflevector_v2s16_v2s16_2_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_u + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: $sgpr0 = COPY [[COPY]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_u_2 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_2 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] + ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_3_u +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_u + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: $sgpr0 = COPY [[S_LSHR_B32_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_u_3 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_3 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: $sgpr0 = COPY [[COPY]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_0_0 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_0 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] + ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_0_1 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_1 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: $sgpr0 = COPY [[COPY]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_1_0 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_0 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] + ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_1_1 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_1 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] + ; GFX9: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_2_2 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_2 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] + ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_2_3 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_3 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: $sgpr0 = COPY [[COPY]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_3_2 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_2 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] + ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2) + $sgpr0 = COPY %2 + +... + +--- +name: s_shufflevector_v2s16_v2s16_3_3 +tracksRegLiveness: true +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_3 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] + ; GFX9: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3) + $sgpr0 = COPY %2 + +...