diff --git a/llvm/test/CodeGen/X86/unpckpd-to-faster-alternative.ll b/llvm/test/CodeGen/X86/unpckpd-to-faster-alternative.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/unpckpd-to-faster-alternative.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefixes=CHECK,CHECK-512,CHECK-SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake | FileCheck %s --check-prefixes=CHECK,CHECK-SKL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK,CHECK-512,CHECK-ICX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-512,CHECK-V4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-512,CHECK-ZNVER4 + +define <16 x float> @transform_VUNPCKLPDZrr(<16 x float> %a, <16 x float> %b) nounwind { +; CHECK-512-LABEL: transform_VUNPCKLPDZrr: +; CHECK-512: # %bb.0: +; CHECK-512-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; CHECK-512-NEXT: retq +; +; CHECK-SKL-LABEL: transform_VUNPCKLPDZrr: +; CHECK-SKL: # %bb.0: +; CHECK-SKL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] +; CHECK-SKL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[2],ymm3[2] +; CHECK-SKL-NEXT: retq + %shufp = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + ret <16 x float> %shufp +} + +define <16 x float> @transform_VUNPCKHPDZrr(<16 x float> %a, <16 x float> %b) nounwind { +; CHECK-512-LABEL: transform_VUNPCKHPDZrr: +; CHECK-512: # %bb.0: +; CHECK-512-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; CHECK-512-NEXT: retq +; +; CHECK-SKL-LABEL: transform_VUNPCKHPDZrr: +; CHECK-SKL: # %bb.0: +; CHECK-SKL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; CHECK-SKL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm3[1],ymm1[3],ymm3[3] +; CHECK-SKL-NEXT: retq + %shufp = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> + ret <16 x float> %shufp +} + +define <8 x float> @transform_VUNPCKLPDYrr(<8 x float> %a, <8 x float> %b) nounwind { +; CHECK-LABEL: transform_VUNPCKLPDYrr: +; CHECK: # %bb.0: +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; CHECK-NEXT: retq +; -LABEL: transform_VUNPCKLPDYrr: +; : # %bb.0: +; -NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1],ymm0[4,5],ymm1[4,5] +; -NEXT: retq + %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shufp +} + +define <8 x float> @transform_VUNPCKHPDYrr(<8 x float> %a, <8 x float> %b) nounwind { +; CHECK-LABEL: transform_VUNPCKHPDYrr: +; CHECK: # %bb.0: +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; CHECK-NEXT: retq +; -LABEL: transform_VUNPCKHPDYrr: +; : # %bb.0: +; -NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3],ymm0[6,7],ymm1[6,7] +; -NEXT: retq + %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shufp +} + +define <4 x float> @transform_VUNPCKLPDrr(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: transform_VUNPCKLPDrr: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; -LABEL: transform_VUNPCKLPDrr: +; : # %bb.0: +; -NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1] +; -NEXT: retq + %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shufp +} + +define <4 x float> @transform_VUNPCKHPDrr(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: transform_VUNPCKHPDrr: +; CHECK: # %bb.0: +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; CHECK-NEXT: retq +; -LABEL: transform_VUNPCKHPDrr: +; : # %bb.0: +; -NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[2,3] +; -NEXT: retq + %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shufp +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-ICX: {{.*}} +; CHECK-SKX: {{.*}} +; CHECK-V4: {{.*}} +; CHECK-ZNVER4: {{.*}}