diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12937,8 +12937,8 @@ (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; -def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))), - (VMOVW2SHrr GR32:$src)>; +def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), + (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; // AVX 128-bit movw instruction write zeros in the high 128-bit part. def : Pat<(v8i16 (X86vzload16 addr:$src)), diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -1956,6 +1956,76 @@ ret <8 x half> %3 } +define <16 x i16> @test22(i16* %mem) nounwind { +; X64-LABEL: test22: +; X64: # %bb.0: +; X64-NEXT: movzwl 0, %eax +; X64-NEXT: andw (%rdi), %ax +; X64-NEXT: vmovw %eax, %xmm0 +; X64-NEXT: retq +; +; X86-LABEL: test22: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 0, %ecx +; X86-NEXT: andw (%eax), %cx +; X86-NEXT: vmovw %ecx, %xmm0 +; X86-NEXT: retl + %1 = load i16, i16* null, align 2 + %2 = load i16, i16* %mem, align 2 + %3 = and i16 %1, %2 + %4 = insertelement <16 x i16> , i16 %3, i32 0 + ret <16 x i16> %4 +} + +define void @pr52560(i8 %0, <2 x i16> %1, i8* %c) nounwind { +; X64-LABEL: pr52560: +; X64: # %bb.0: # %entry +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: vmovw %eax, %xmm1 +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vpcmpgtw %xmm2, %xmm1, %k1 +; X64-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vmovw %xmm0, %eax +; X64-NEXT: testw %ax, %ax +; X64-NEXT: je .LBB121_2 +; X64-NEXT: # %bb.1: # %for.body.preheader +; X64-NEXT: movb $0, (%rsi) +; X64-NEXT: .LBB121_2: # %for.end +; X64-NEXT: retq +; +; X86-LABEL: pr52560: +; X86: # %bb.0: # %entry +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: vmovw %eax, %xmm1 +; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X86-NEXT: vpcmpgtw %xmm2, %xmm1, %k1 +; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vmovw %xmm0, %eax +; X86-NEXT: testw %ax, %ax +; X86-NEXT: je .LBB121_2 +; X86-NEXT: # %bb.1: # %for.body.preheader +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb $0, (%eax) +; X86-NEXT: .LBB121_2: # %for.end +; X86-NEXT: retl +entry: + %conv = sext i8 %0 to i16 + %2 = insertelement <2 x i16> , i16 %conv, i32 0 + %3 = icmp sgt <2 x i16> %2, zeroinitializer + %4 = select <2 x i1> %3, <2 x i16> %1, <2 x i16> + %5 = extractelement <2 x i16> %4, i32 0 + %tobool.not14 = icmp eq i16 %5, 0 + br i1 %tobool.not14, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + store i8 0, i8* %c, align 1 + br label %for.end + +for.end: ; preds = %for.body.preheader, %entry + ret void +} + define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind { ; X64-LABEL: pr52561: ; X64: # %bb.0: