Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -4002,10 +4002,18 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; +def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), (f32 FR32X:$src1), (f32 FR32X:$src2))), + (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; +def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), (f64 FR64X:$src1), (f64 FR64X:$src2))), + (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; Index: test/CodeGen/X86/avx512-load-store.ll =================================================================== --- test/CodeGen/X86/avx512-load-store.ll +++ test/CodeGen/X86/avx512-load-store.ll @@ -12,7 +12,6 @@ ; CHECK32-LABEL: test_mm_mask_move_ss: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: andb $1, %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} ; CHECK32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] @@ -37,7 +36,6 @@ ; CHECK32-LABEL: test_mm_maskz_move_ss: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: andb $1, %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} @@ -62,7 +60,6 @@ ; CHECK32-LABEL: test_mm_mask_move_sd: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: andb $1, %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; CHECK32-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] @@ -87,7 +84,6 @@ ; CHECK32-LABEL: test_mm_maskz_move_sd: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: andb $1, %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} Index: test/CodeGen/X86/avx512-select.ll =================================================================== --- test/CodeGen/X86/avx512-select.ll +++ test/CodeGen/X86/avx512-select.ll @@ -289,7 +289,6 @@ ; ; X64-LABEL: pr30561_f64: ; X64: # BB#0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: kmovw %edi, %k1 ; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ; X64-NEXT: retq @@ -309,7 +308,6 @@ ; ; X64-LABEL: pr30561_f32: ; X64: # BB#0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: kmovw %edi, %k1 ; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; X64-NEXT: retq