diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2191,7 +2191,17 @@ .addReg(SrcVec) .addImm(16); } - } else if (isZeroOrUndef(Mask[0]) && Mask[1] == 0) { + } else if (Mask[0] == -1 && Mask[1] == 0) { + if (IsVALU) { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), DstReg) + .addImm(16) + .addReg(SrcVec); + } else { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHL_B32), DstReg) + .addReg(SrcVec) + .addImm(16); + } + } else if (Mask[0] == 0 && Mask[1] == 0) { if (IsVALU) { // Write low half of the register into the high half. MachineInstr *MovSDWA = diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir @@ -56,8 +56,8 @@ ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_0 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) - ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX9: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) @@ -143,8 +143,8 @@ ; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0) - ; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]] + ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec + ; GFX9: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2) @@ -424,8 +424,8 @@ ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_0 ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] - ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: $sgpr0 = COPY [[S_LSHL_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0) @@ -511,8 +511,8 @@ ; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_2 ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]] - ; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]] + ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: $sgpr0 = COPY [[S_LSHL_B32_]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 %2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2)