diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36249,9 +36249,10 @@ (V1.getOpcode() == ISD::SCALAR_TO_VECTOR && isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) { Shuffle = X86ISD::VZEXT_MOVL; - SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16 - : !Subtarget.hasSSE2() ? MVT::v4f32 - : MaskVT; + if (MaskEltSize == 16) + SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16); + else + SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; return true; } } @@ -36300,9 +36301,10 @@ isUndefOrEqual(Mask[0], 0) && isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { Shuffle = X86ISD::VZEXT_MOVL; - SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16 - : !Subtarget.hasSSE2() ? MVT::v4f32 - : MaskVT; + if (MaskEltSize == 16) + SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16); + else + SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; return true; } diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -2025,3 +2025,39 @@ for.end: ; preds = %for.body.preheader, %entry ret void } + +define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind { +; X64-LABEL: pr52561: +; X64: # %bb.0: +; X64-NEXT: vpbroadcastd {{.*#+}} ymm4 = [112,112,112,112,112,112,112,112] +; X64-NEXT: vpaddd %ymm4, %ymm2, %ymm2 +; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 +; X64-NEXT: vpaddd %ymm4, %ymm3, %ymm2 +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: pr52561: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-32, %esp +; X86-NEXT: subl $32, %esp +; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1 +; X86-NEXT: vpbroadcastd {{.*#+}} ymm3 = [112,112,112,112,112,112,112,112] +; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm2 +; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 +; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm1 +; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 +; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl + %1 = add <16 x i32> %a, + %2 = add <16 x i32> %1, %b + %3 = and <16 x i32> %2, + ret <16 x i32> %3 +}