diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -139,7 +139,6 @@ bool selectG_PTRMASK(MachineInstr &I) const; bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const; bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const; - bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const; bool selectBufferLoadLds(MachineInstr &MI) const; bool selectGlobalLoadLds(MachineInstr &MI) const; bool selectBVHIntrinsic(MachineInstr &I) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2890,192 +2890,6 @@ return true; } -static bool isZeroOrUndef(int X) { - return X == 0 || X == -1; -} - -static bool isOneOrUndef(int X) { - return X == 1 || X == -1; -} - -static bool isZeroOrOneOrUndef(int X) { - return X == 0 || X == 1 || X == -1; -} - -// Normalize a VOP3P shuffle mask to refer to the low/high half of a single -// 32-bit register. -static Register normalizeVOP3PMask(int NewMask[2], Register Src0, Register Src1, - ArrayRef Mask) { - NewMask[0] = Mask[0]; - NewMask[1] = Mask[1]; - if (isZeroOrOneOrUndef(Mask[0]) && isZeroOrOneOrUndef(Mask[1])) - return Src0; - - assert(NewMask[0] == 2 || NewMask[0] == 3 || NewMask[0] == -1); - assert(NewMask[1] == 2 || NewMask[1] == 3 || NewMask[1] == -1); - - // Shift the mask inputs to be 0/1; - NewMask[0] = NewMask[0] == -1 ? -1 : NewMask[0] - 2; - NewMask[1] = NewMask[1] == -1 ? -1 : NewMask[1] - 2; - return Src1; -} - -// This is only legal with VOP3P instructions as an aid to op_sel matching. -bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR( - MachineInstr &MI) const { - Register DstReg = MI.getOperand(0).getReg(); - Register Src0Reg = MI.getOperand(1).getReg(); - Register Src1Reg = MI.getOperand(2).getReg(); - ArrayRef ShufMask = MI.getOperand(3).getShuffleMask(); - - const LLT V2S16 = LLT::fixed_vector(2, 16); - if (MRI->getType(DstReg) != V2S16 || MRI->getType(Src0Reg) != V2S16) - return false; - - if (!AMDGPU::isLegalVOP3PShuffleMask(ShufMask)) - return false; - - assert(ShufMask.size() == 2); - - MachineBasicBlock *MBB = MI.getParent(); - const DebugLoc &DL = MI.getDebugLoc(); - - const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); - const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID; - const TargetRegisterClass &RC = IsVALU ? - AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; - - // Handle the degenerate case which should have folded out. - if (ShufMask[0] == -1 && ShufMask[1] == -1) { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), DstReg); - - MI.eraseFromParent(); - return RBI.constrainGenericRegister(DstReg, RC, *MRI); - } - - // A legal VOP3P mask only reads one of the sources. - int Mask[2]; - Register SrcVec = normalizeVOP3PMask(Mask, Src0Reg, Src1Reg, ShufMask); - - if (!RBI.constrainGenericRegister(DstReg, RC, *MRI) || - !RBI.constrainGenericRegister(SrcVec, RC, *MRI)) - return false; - - // TODO: This also should have been folded out - if (isZeroOrUndef(Mask[0]) && isOneOrUndef(Mask[1])) { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::COPY), DstReg) - .addReg(SrcVec); - - MI.eraseFromParent(); - return true; - } - - if (Mask[0] == 1 && Mask[1] == -1) { - if (IsVALU) { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg) - .addImm(16) - .addReg(SrcVec); - } else { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg) - .addReg(SrcVec) - .addImm(16); - } - } else if (Mask[0] == -1 && Mask[1] == 0) { - if (IsVALU) { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), DstReg) - .addImm(16) - .addReg(SrcVec); - } else { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHL_B32), DstReg) - .addReg(SrcVec) - .addImm(16); - } - } else if (Mask[0] == 0 && Mask[1] == 0) { - if (IsVALU) { - if (STI.hasSDWA()) { - // Write low half of the register into the high half. - MachineInstr *MovSDWA = - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) - .addImm(0) // $src0_modifiers - .addReg(SrcVec) // $src0 - .addImm(0) // $clamp - .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel - .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused - .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel - .addReg(SrcVec, RegState::Implicit); - MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); - } else { - Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg) - .addImm(0xFFFF) - .addReg(SrcVec); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg) - .addReg(TmpReg) - .addImm(16) - .addReg(TmpReg); - } - } else { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) - .addReg(SrcVec) - .addReg(SrcVec); - } - } else if (Mask[0] == 1 && Mask[1] == 1) { - if (IsVALU) { - if (STI.hasSDWA()) { - // Write high half of the register into the low half. - MachineInstr *MovSDWA = - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) - .addImm(0) // $src0_modifiers - .addReg(SrcVec) // $src0 - .addImm(0) // $clamp - .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel - .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused - .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel - .addReg(SrcVec, RegState::Implicit); - MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); - } else { - Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) - .addImm(16) - .addReg(SrcVec); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg) - .addReg(TmpReg) - .addImm(16) - .addReg(TmpReg); - } - } else { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HH_B32_B16), DstReg) - .addReg(SrcVec) - .addReg(SrcVec); - } - } else if (Mask[0] == 1 && Mask[1] == 0) { - if (IsVALU) { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_ALIGNBIT_B32_e64), DstReg) - .addReg(SrcVec) - .addReg(SrcVec) - .addImm(16); - } else { - if (STI.hasSPackHL()) { - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HL_B32_B16), DstReg) - .addReg(SrcVec) - .addReg(SrcVec); - } else { - Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg) - .addReg(SrcVec) - .addImm(16); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) - .addReg(TmpReg) - .addReg(SrcVec); - } - } - } else - llvm_unreachable("all shuffle masks should be handled"); - - MI.eraseFromParent(); - return true; -} - bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { unsigned Opc; unsigned Size = MI.getOperand(3).getImm(); @@ -3476,8 +3290,6 @@ return selectG_EXTRACT_VECTOR_ELT(I); case TargetOpcode::G_INSERT_VECTOR_ELT: return selectG_INSERT_VECTOR_ELT(I); - case TargetOpcode::G_SHUFFLE_VECTOR: - return selectG_SHUFFLE_VECTOR(I); case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16: case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -60,8 +60,6 @@ MachineIRBuilder &B) const; bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - bool legalizeShuffleVector(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const; bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1550,14 +1550,7 @@ .clampMaxNumElements(1, S16, 2) // TODO: Make 4? .clampMaxNumElements(0, S16, 64); - // TODO: Don't fully scalarize v2s16 pieces? Or combine out those - // pre-legalize. - if (ST.hasVOP3PInsts()) { - getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) - .customFor({V2S16, V2S16}) - .lower(); - } else - getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower(); + getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower(); // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { @@ -1764,8 +1757,6 @@ return legalizeExtractVectorElt(MI, MRI, B); case TargetOpcode::G_INSERT_VECTOR_ELT: return legalizeInsertVectorElt(MI, MRI, B); - case TargetOpcode::G_SHUFFLE_VECTOR: - return legalizeShuffleVector(MI, MRI, B); case TargetOpcode::G_FSIN: case TargetOpcode::G_FCOS: return legalizeSinCos(MI, MRI, B); @@ -2404,26 +2395,6 @@ return true; } -bool AMDGPULegalizerInfo::legalizeShuffleVector( - MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { - const LLT V2S16 = LLT::fixed_vector(2, 16); - - Register Dst = MI.getOperand(0).getReg(); - Register Src0 = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(Dst); - LLT SrcTy = MRI.getType(Src0); - - if (SrcTy == V2S16 && DstTy == V2S16 && - AMDGPU::isLegalVOP3PShuffleMask(MI.getOperand(3).getShuffleMask())) - return true; - - MachineIRBuilder HelperBuilder(MI); - GISelObserverWrapper DummyObserver; - LegalizerHelper Helper(B.getMF(), DummyObserver, HelperBuilder); - return Helper.lowerShuffleVector(MI) == LegalizerHelper::Legalized; -} - bool AMDGPULegalizerInfo::legalizeSinCos( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir +++ /dev/null @@ -1,970 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX11 %s - ---- -name: v_shufflevector_v2s16_v2s16_u_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_u - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX9-NEXT: $vgpr0 = COPY [[DEF]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_u - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr0 = COPY [[DEF]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_0_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_u - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: $vgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_u - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_u_0 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_0 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_0 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_1_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_u - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_u - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, undef) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_u_1 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_1 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: $vgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_1 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 1) - $vgpr0 = COPY %2 - -... - - ---- -name: v_shufflevector_v2s16_v2s16_2_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_u - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: $vgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_u - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_u_2 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_2 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_2 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_3_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_u - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_u - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_u_3 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_3 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: $vgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_3 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_0_0 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_0 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) - ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_sdwa]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_0 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_AND_B32_e32_]], 16, [[V_AND_B32_e32_]], implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_0_1 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_0_1 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: $vgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_1 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_1_0 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_0 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_0 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_1_1 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_1_1 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 4, 2, 5, implicit $exec, implicit [[COPY]](tied-def 0) - ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_sdwa]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_1 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_LSHRREV_B32_e64_]], 16, [[V_LSHRREV_B32_e64_]], implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_2_2 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_2 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) - ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_sdwa]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_2 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_AND_B32_e32_]], 16, [[V_AND_B32_e32_]], implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_2_3 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_2_3 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: $vgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_3 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: $vgpr0 = COPY [[COPY]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_3_2 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_2 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec - ; GFX9-NEXT: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_2 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2) - $vgpr0 = COPY %2 - -... - ---- -name: v_shufflevector_v2s16_v2s16_3_3 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_3_3 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 4, 2, 5, implicit $exec, implicit [[COPY]](tied-def 0) - ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_sdwa]] - ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_3 - ; GFX11: liveins: $vgpr0, $vgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec - ; GFX11-NEXT: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_LSHRREV_B32_e64_]], 16, [[V_LSHRREV_B32_e64_]], implicit $exec - ; GFX11-NEXT: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]] - %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = COPY $vgpr1 - %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3) - $vgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_u_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_u - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX9-NEXT: $sgpr0 = COPY [[DEF]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_u - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr0 = COPY [[DEF]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_0_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_u - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_u - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_u_0 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_0 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_LSHL_B32_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_0 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc - ; GFX11-NEXT: $sgpr0 = COPY [[S_LSHL_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_1_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_u - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_LSHR_B32_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_u - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc - ; GFX11-NEXT: $sgpr0 = COPY [[S_LSHR_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, undef) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_u_1 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_1 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_1 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 1) - $sgpr0 = COPY %2 - -... - - ---- -name: s_shufflevector_v2s16_v2s16_2_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_u - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_u - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_u_2 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_2 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_LSHL_B32_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_2 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc - ; GFX11-NEXT: $sgpr0 = COPY [[S_LSHL_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_3_u -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_u - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc - ; GFX9-NEXT: $sgpr0 = COPY [[S_LSHR_B32_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_u - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc - ; GFX11-NEXT: $sgpr0 = COPY [[S_LSHR_B32_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_u_3 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_3 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_3 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_0_0 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_0 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] - ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_0 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] - ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_0_1 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_0_1 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_1 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_1_0 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_0 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc - ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] - ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_0 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[COPY]] - ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_HL_B32_B16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_1_1 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_1_1 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] - ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_1 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] - ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_2_2 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_2 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] - ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_2 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] - ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_2_3 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_2_3 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: $sgpr0 = COPY [[COPY]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_3 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_3_2 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_2 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc - ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] - ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_2 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[COPY]] - ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_HL_B32_B16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2) - $sgpr0 = COPY %2 - -... - ---- -name: s_shufflevector_v2s16_v2s16_3_3 -tracksRegLiveness: true -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - - ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_3_3 - ; GFX9: liveins: $sgpr0, $sgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] - ; GFX9-NEXT: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] - ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_3 - ; GFX11: liveins: $sgpr0, $sgpr1 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX11-NEXT: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] - ; GFX11-NEXT: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] - %0:sgpr(<2 x s16>) = COPY $sgpr0 - %1:sgpr(<2 x s16>) = COPY $sgpr1 - %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3) - $sgpr0 = COPY %2 - -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -28,8 +28,9 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(undef, undef) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) @@ -66,8 +67,13 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, undef) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) @@ -104,8 +110,13 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(undef, 0) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) @@ -144,8 +155,16 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, 1) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) @@ -184,8 +203,16 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(1, 0) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) @@ -224,8 +251,15 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, 0) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) @@ -263,8 +297,15 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(1, 1) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1) @@ -303,8 +344,15 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(2, 2) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2) @@ -341,8 +389,13 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(2, undef) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef) @@ -379,8 +432,13 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(undef, 2) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) @@ -419,8 +477,16 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(2, 3) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3) @@ -459,8 +525,16 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(3, 2) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2) @@ -496,8 +570,13 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(undef, 3) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3) @@ -533,8 +612,13 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(3, undef) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef) @@ -572,8 +656,15 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(3, 3) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3) @@ -659,8 +750,15 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, 0) - ; GFX9-NEXT: $vgpr0 = COPY [[SHUF]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shuffle-vector.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-shuffle-vector.mir +++ /dev/null @@ -1,77 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s - ---- -name: shufflevector_v2s16_ss -legalized: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - ; CHECK-LABEL: name: shufflevector_v2s16_ss - ; CHECK: liveins: $sgpr0, $sgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, 1) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $sgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) -... - ---- -name: shufflevector_v2s16_sv -legalized: true - -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - ; CHECK-LABEL: name: shufflevector_v2s16_sv - ; CHECK: liveins: $sgpr0, $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY2]](<2 x s16>), [[COPY1]], shufflemask(0, 1) - %0:_(<2 x s16>) = COPY $sgpr0 - %1:_(<2 x s16>) = COPY $vgpr0 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) -... - ---- -name: shufflevector_v2s16_vs -legalized: true - -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - ; CHECK-LABEL: name: shufflevector_v2s16_vs - ; CHECK: liveins: $sgpr0, $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY2]], shufflemask(0, 1) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $sgpr0 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) -... - ---- -name: shufflevector_v2s16_vv -legalized: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - ; CHECK-LABEL: name: shufflevector_v2s16_vv - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s16>), [[COPY1]], shufflemask(0, 1) - %0:_(<2 x s16>) = COPY $vgpr0 - %1:_(<2 x s16>) = COPY $vgpr1 - %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) -...