|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 | 2 | ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
|
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
|
4 |
| -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX |
| 4 | +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 |
| 5 | +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 |
5 | 6 |
|
6 | 7 | define float @pr26491(<4 x float> %a0) {
|
7 | 8 | ; SSE2-LABEL: pr26491:
|
@@ -37,3 +38,66 @@ define float @pr26491(<4 x float> %a0) {
|
37 | 38 | %5 = fadd float %3, %4
|
38 | 39 | ret float %5
|
39 | 40 | }
|
| 41 | + |
| 42 | +; When simplifying away a splat (broadcast), the hop type must match the shuffle type. |
| 43 | + |
| 44 | +define <4 x double> @PR41414(i64 %x, <4 x double> %y) { |
| 45 | +; SSE2-LABEL: PR41414: |
| 46 | +; SSE2: # %bb.0: |
| 47 | +; SSE2-NEXT: movq %rdi, %xmm2 |
| 48 | +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] |
| 49 | +; SSE2-NEXT: subpd {{.*}}(%rip), %xmm2 |
| 50 | +; SSE2-NEXT: movapd %xmm2, %xmm3 |
| 51 | +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1] |
| 52 | +; SSE2-NEXT: addpd %xmm2, %xmm3 |
| 53 | +; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0,0] |
| 54 | +; SSE2-NEXT: divpd %xmm3, %xmm1 |
| 55 | +; SSE2-NEXT: divpd %xmm3, %xmm0 |
| 56 | +; SSE2-NEXT: xorpd %xmm2, %xmm2 |
| 57 | +; SSE2-NEXT: addpd %xmm2, %xmm0 |
| 58 | +; SSE2-NEXT: addpd %xmm2, %xmm1 |
| 59 | +; SSE2-NEXT: retq |
| 60 | +; |
| 61 | +; SSSE3-LABEL: PR41414: |
| 62 | +; SSSE3: # %bb.0: |
| 63 | +; SSSE3-NEXT: movq %rdi, %xmm2 |
| 64 | +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] |
| 65 | +; SSSE3-NEXT: subpd {{.*}}(%rip), %xmm2 |
| 66 | +; SSSE3-NEXT: haddpd %xmm2, %xmm2 |
| 67 | +; SSSE3-NEXT: divpd %xmm2, %xmm1 |
| 68 | +; SSSE3-NEXT: divpd %xmm2, %xmm0 |
| 69 | +; SSSE3-NEXT: xorpd %xmm2, %xmm2 |
| 70 | +; SSSE3-NEXT: addpd %xmm2, %xmm0 |
| 71 | +; SSSE3-NEXT: addpd %xmm2, %xmm1 |
| 72 | +; SSSE3-NEXT: retq |
| 73 | +; |
| 74 | +; AVX1-LABEL: PR41414: |
| 75 | +; AVX1: # %bb.0: |
| 76 | +; AVX1-NEXT: vmovq %rdi, %xmm1 |
| 77 | +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] |
| 78 | +; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 |
| 79 | +; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 |
| 80 | +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 |
| 81 | +; AVX1-NEXT: vdivpd %ymm1, %ymm0, %ymm0 |
| 82 | +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 |
| 83 | +; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 |
| 84 | +; AVX1-NEXT: retq |
| 85 | +; |
| 86 | +; AVX2-LABEL: PR41414: |
| 87 | +; AVX2: # %bb.0: |
| 88 | +; AVX2-NEXT: vmovq %rdi, %xmm1 |
| 89 | +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] |
| 90 | +; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 |
| 91 | +; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 |
| 92 | +; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 |
| 93 | +; AVX2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 |
| 94 | +; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 |
| 95 | +; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 |
| 96 | +; AVX2-NEXT: retq |
| 97 | + %conv = uitofp i64 %x to double |
| 98 | + %t0 = insertelement <4 x double> undef, double %conv, i32 0 |
| 99 | + %t1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer |
| 100 | + %t2 = fdiv <4 x double> %y, %t1 |
| 101 | + %t3 = fadd <4 x double> zeroinitializer, %t2 |
| 102 | + ret <4 x double> %t3 |
| 103 | +} |
0 commit comments