Index: llvm/trunk/test/CodeGen/X86/avx512-hadd-hsub.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-hadd-hsub.ll +++ llvm/trunk/test/CodeGen/X86/avx512-hadd-hsub.ll @@ -111,10 +111,6 @@ } define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) { -; CHECK-LABEL: hadd_16_3: -; CHECK: # BB#0: -; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: retq ; KNL-LABEL: hadd_16_3: ; KNL: # BB#0: ; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] @@ -138,10 +134,6 @@ } define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) { -; CHECK-LABEL: fhadd_16_3: -; CHECK: # BB#0: -; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: retq ; KNL-LABEL: fhadd_16_3: ; KNL: # BB#0: ; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] @@ -164,14 +156,6 @@ } define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) { -; CHECK-LABEL: fhadd_16_4: -; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3] -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] -; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 -; CHECK-NEXT: retq ; KNL-LABEL: fhadd_16_4: ; KNL: # BB#0: ; KNL-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] @@ -190,3 +174,130 @@ %x229 = fadd <8 x double> %x226, %x228 ret <8 x double> %x229 } + +define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) { +; KNL-LABEL: fadd_noundef_low: +; KNL: # BB#0: +; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; KNL-NEXT: retq +; +; SKX-LABEL: fadd_noundef_low: +; SKX: # BB#0: +; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; SKX-NEXT: retq + %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> + %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> + %x229 = fadd <8 x double> %x226, %x228 + %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> + ret <4 x double> %x230 +} + +define <4 x double> @fadd_noundef_high(<8 x double> %x225, <8 x double> %x227) { +; KNL-LABEL: fadd_noundef_high: +; KNL: # BB#0: +; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; KNL-NEXT: retq +; +; SKX-LABEL: fadd_noundef_high: +; SKX: # BB#0: +; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; SKX-NEXT: retq + %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> + %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> + %x229 = fadd <8 x double> %x226, %x228 + %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> + ret <4 x double> %x230 +} + + +define <8 x i32> @hadd_16_3_sv(<16 x i32> %x225, <16 x i32> %x227) { +; KNL-LABEL: hadd_16_3_sv: +; KNL: # BB#0: +; KNL-NEXT: vshufps {{.*#+}} zmm2 = zmm0[0,2],zmm1[0,2],zmm0[4,6],zmm1[4,6],zmm0[8,10],zmm1[8,10],zmm0[12,14],zmm1[12,14] +; KNL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,3],zmm1[1,3],zmm0[5,7],zmm1[5,7],zmm0[9,11],zmm1[9,11],zmm0[13,15],zmm1[13,15] +; KNL-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; KNL-NEXT: retq +; +; SKX-LABEL: hadd_16_3_sv: +; SKX: # BB#0: +; SKX-NEXT: vshufps {{.*#+}} zmm2 = zmm0[0,2],zmm1[0,2],zmm0[4,6],zmm1[4,6],zmm0[8,10],zmm1[8,10],zmm0[12,14],zmm1[12,14] +; SKX-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,3],zmm1[1,3],zmm0[5,7],zmm1[5,7],zmm0[9,11],zmm1[9,11],zmm0[13,15],zmm1[13,15] +; SKX-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; SKX-NEXT: retq + %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> + %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> + %x229 = add <16 x i32> %x226, %x228 + %x230 = shufflevector <16 x i32> %x229, <16 x i32> undef, <8 x i32> + ret <8 x i32> %x230 +} + + +define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) { +; KNL-LABEL: fadd_noundef_eel: +; KNL: # BB#0: +; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; KNL-NEXT: retq +; +; SKX-LABEL: fadd_noundef_eel: +; SKX: # BB#0: +; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq + %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> + %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> + %x229 = fadd <8 x double> %x226, %x228 + %x230 = extractelement <8 x double> %x229, i32 0 + ret double %x230 +} + + + +define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) { +; KNL-LABEL: fsub_noundef_ee: +; KNL: # BB#0: +; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; KNL-NEXT: vsubpd %zmm0, %zmm2, %zmm0 +; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; KNL-NEXT: retq +; +; SKX-LABEL: fsub_noundef_ee: +; SKX: # BB#0: +; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vsubpd %zmm0, %zmm2, %zmm0 +; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq + %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> + %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> + %x229 = fsub <8 x double> %x226, %x228 + %x230 = extractelement <8 x double> %x229, i32 5 + ret double %x230 +} +