diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2850,7 +2850,6 @@ return false; assert(ShufMask.size() == 2); - assert(STI.hasSDWA() && "no target has VOP3P but not SDWA"); MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); @@ -2907,17 +2906,28 @@ } } else if (Mask[0] == 0 && Mask[1] == 0) { if (IsVALU) { - // Write low half of the register into the high half. - MachineInstr *MovSDWA = - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) - .addImm(0) // $src0_modifiers - .addReg(SrcVec) // $src0 - .addImm(0) // $clamp - .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel - .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused - .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel - .addReg(SrcVec, RegState::Implicit); - MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + if (STI.hasSDWA()) { + // Write low half of the register into the high half. + MachineInstr *MovSDWA = + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) + .addImm(0) // $src0_modifiers + .addReg(SrcVec) // $src0 + .addImm(0) // $clamp + .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel + .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused + .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel + .addReg(SrcVec, RegState::Implicit); + MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + } else { + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg) + .addImm(0xFFFF) + .addReg(SrcVec); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg) + .addReg(TmpReg) + .addImm(16) + .addReg(TmpReg); + } } else { BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) .addReg(SrcVec) @@ -2925,17 +2935,28 @@ } } else if (Mask[0] == 1 && Mask[1] == 1) { if (IsVALU) { - // Write high half of the register into the low half. - MachineInstr *MovSDWA = - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) - .addImm(0) // $src0_modifiers - .addReg(SrcVec) // $src0 - .addImm(0) // $clamp - .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel - .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused - .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel - .addReg(SrcVec, RegState::Implicit); - MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + if (STI.hasSDWA()) { + // Write high half of the register into the low half. + MachineInstr *MovSDWA = + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) + .addImm(0) // $src0_modifiers + .addReg(SrcVec) // $src0 + .addImm(0) // $clamp + .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel + .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused + .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel + .addReg(SrcVec, RegState::Implicit); + MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); + } else { + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) + .addImm(16) + .addReg(SrcVec); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg) + .addReg(TmpReg) + .addImm(16) + .addReg(TmpReg); + } } else { BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HH_B32_B16), DstReg) .addReg(SrcVec) @@ -2948,13 +2969,19 @@ .addReg(SrcVec) .addImm(16); } else { - Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg) - .addReg(SrcVec) - .addImm(16); - BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) - .addReg(TmpReg) - .addReg(SrcVec); + if (STI.hasSPackHL()) { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HL_B32_B16), DstReg) + .addReg(SrcVec) + .addReg(SrcVec); + } else { + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg) + .addReg(SrcVec) + .addImm(16); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg) + .addReg(TmpReg) + .addReg(SrcVec); + } } } else llvm_unreachable("all shuffle masks should be handled"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX11 %s --- name: v_shufflevector_v2s16_v2s16_u_u @@ -16,6 +17,10 @@ ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX9: $vgpr0 = COPY [[DEF]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_u + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX11: $vgpr0 = COPY [[DEF]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) @@ -37,6 +42,10 @@ ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: $vgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_u + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11: $vgpr0 = COPY [[COPY]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) @@ -59,6 +68,11 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec ; GFX9: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_0 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX11: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) @@ -81,6 +95,11 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec ; GFX9: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_u + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX11: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, undef) @@ -102,6 +121,10 @@ ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: $vgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_1 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11: $vgpr0 = COPY [[COPY]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 1) @@ -124,6 +147,10 @@ ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: $vgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_u + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11: $vgpr0 = COPY [[COPY]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef) @@ -146,6 +173,11 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec ; GFX9: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_2 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX11: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) @@ -168,6 +200,11 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec ; GFX9: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_u + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX11: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef) @@ -189,6 +226,10 @@ ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: $vgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_u_3 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11: $vgpr0 = COPY [[COPY]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3) @@ -211,6 +252,12 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_0 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[COPY]], implicit $exec + ; GFX11: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_AND_B32_e32_]], 16, [[V_AND_B32_e32_]], implicit $exec + ; GFX11: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) @@ -232,6 +279,10 @@ ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: $vgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_0_1 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11: $vgpr0 = COPY [[COPY]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) @@ -254,6 +305,11 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec ; GFX9: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_0 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec + ; GFX11: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) @@ -276,6 +332,12 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 4, 2, 5, implicit $exec, implicit [[COPY]](tied-def 0) ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_1_1 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX11: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_LSHRREV_B32_e64_]], 16, [[V_LSHRREV_B32_e64_]], implicit $exec + ; GFX11: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1) @@ -298,6 +360,12 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_2 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[COPY]], implicit $exec + ; GFX11: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_AND_B32_e32_]], 16, [[V_AND_B32_e32_]], implicit $exec + ; GFX11: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2) @@ -319,6 +387,10 @@ ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: $vgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_2_3 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11: $vgpr0 = COPY [[COPY]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3) @@ -341,6 +413,11 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec ; GFX9: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_2 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 [[COPY]], [[COPY]], 16, implicit $exec + ; GFX11: $vgpr0 = COPY [[V_ALIGNBIT_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2) @@ -363,6 +440,12 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 4, 2, 5, implicit $exec, implicit [[COPY]](tied-def 0) ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + ; GFX11-LABEL: name: v_shufflevector_v2s16_v2s16_3_3 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX11: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[V_LSHRREV_B32_e64_]], 16, [[V_LSHRREV_B32_e64_]], implicit $exec + ; GFX11: $vgpr0 = COPY [[V_LSHL_OR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3) @@ -384,6 +467,10 @@ ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GFX9: $sgpr0 = COPY [[DEF]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_u + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX11: $sgpr0 = COPY [[DEF]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, undef) @@ -405,6 +492,10 @@ ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: $sgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_u + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11: $sgpr0 = COPY [[COPY]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, undef) @@ -427,6 +518,11 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc ; GFX9: $sgpr0 = COPY [[S_LSHL_B32_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_0 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc + ; GFX11: $sgpr0 = COPY [[S_LSHL_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) @@ -449,6 +545,11 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc ; GFX9: $sgpr0 = COPY [[S_LSHR_B32_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_u + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX11: $sgpr0 = COPY [[S_LSHR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, undef) @@ -470,6 +571,10 @@ ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: $sgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_1 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11: $sgpr0 = COPY [[COPY]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 1) @@ -492,6 +597,10 @@ ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: $sgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_u + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11: $sgpr0 = COPY [[COPY]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, undef) @@ -514,6 +623,11 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc ; GFX9: $sgpr0 = COPY [[S_LSHL_B32_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_2 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc + ; GFX11: $sgpr0 = COPY [[S_LSHL_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) @@ -536,6 +650,11 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc ; GFX9: $sgpr0 = COPY [[S_LSHR_B32_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_u + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX11: $sgpr0 = COPY [[S_LSHR_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, undef) @@ -557,6 +676,10 @@ ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: $sgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_u_3 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11: $sgpr0 = COPY [[COPY]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 3) @@ -579,6 +702,11 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_0 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] + ; GFX11: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0) @@ -600,6 +728,10 @@ ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: $sgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_0_1 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11: $sgpr0 = COPY [[COPY]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 1) @@ -623,6 +755,11 @@ ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_0 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[COPY]] + ; GFX11: $sgpr0 = COPY [[S_PACK_HL_B32_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 0) @@ -645,6 +782,11 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] ; GFX9: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_1_1 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX11: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] + ; GFX11: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1, 1) @@ -667,6 +809,11 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_2 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] + ; GFX11: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 2) @@ -688,6 +835,10 @@ ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: $sgpr0 = COPY [[COPY]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_2_3 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11: $sgpr0 = COPY [[COPY]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2, 3) @@ -711,6 +862,11 @@ ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_2 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11: [[S_PACK_HL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HL_B32_B16 [[COPY]], [[COPY]] + ; GFX11: $sgpr0 = COPY [[S_PACK_HL_B32_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 2) @@ -733,6 +889,11 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] ; GFX9: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] + ; GFX11-LABEL: name: s_shufflevector_v2s16_v2s16_3_3 + ; GFX11: liveins: $sgpr0, $sgpr1 + ; GFX11: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX11: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY]] + ; GFX11: $sgpr0 = COPY [[S_PACK_HH_B32_B16_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(3, 3)