Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1079,6 +1079,12 @@ def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))), (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v8f32 VR256X:$src), 1)>; +def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))), + (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), + (v4f64 VR256X:$src), 1)>; +def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))), + (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), + (v4i64 VR256X:$src), 1)>; def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))), (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (v8i32 VR256X:$src), 1)>; Index: test/CodeGen/X86/pr31306.ll =================================================================== --- test/CodeGen/X86/pr31306.ll +++ test/CodeGen/X86/pr31306.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -O2 -mattr=avx512f | FileCheck %s + +@ga = global <4 x i64> zeroinitializer, align 8 +@gb = global <8 x i64> zeroinitializer, align 8 + +define void @blabla(<4 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: blabla: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,3,4] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 +; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vmovdqu %ymm0, {{.*}}(%rip) +; CHECK-NEXT: vmovdqu64 %zmm1, {{.*}}(%rip) +; CHECK-NEXT: retq +entry: + %0 = add <4 x i64> %a, + %1 = add <8 x i64> %b, + %2 = and <8 x i64> %1, + store <4 x i64> %0, <4 x i64>* @ga, align 8 + store <8 x i64> %2, <8 x i64>* @gb, align 8 + ret void +} + + + +@ga2 = global <4 x double> zeroinitializer, align 8 +@gb2 = global <8 x double> zeroinitializer, align 8 + +define void @blabla2(<4 x double> %a, <8 x double> %b) { +; CHECK-LABEL: blabla2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00] +; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm2, %zmm2 +; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vdivpd %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vmovupd %ymm0, {{.*}}(%rip) +; CHECK-NEXT: vmovupd %zmm1, {{.*}}(%rip) +; CHECK-NEXT: retq +entry: + %0 = fadd <4 x double> %a, + %1 = fadd <8 x double> %b, + %2 = fdiv <8 x double> %1, + store <4 x double> %0, <4 x double>* @ga2, align 8 + store <8 x double> %2, <8 x double>* @gb2, align 8 + ret void +}