Skip to content

Commit b1b3368

Browse files
committedApr 24, 2019
[x86] make sure horizontal op and broadcast types match to simplify (PR41414)
If the types don't match, we can't just remove the shuffle. There may be some other opportunity for optimization here, but this should prevent the crashing seen in: https://bugs.llvm.org/show_bug.cgi?id=41414 llvm-svn: 359095
1 parent 513d365 commit b1b3368

File tree

2 files changed

+71
-3
lines changed

2 files changed

+71
-3
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -32820,10 +32820,13 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
3282032820

3282132821
// For a broadcast, peek through an extract element of index 0 to find the
3282232822
// horizontal op: broadcast (ext_vec_elt HOp, 0)
32823+
EVT VT = N->getValueType(0);
3282332824
if (Opcode == X86ISD::VBROADCAST) {
3282432825
SDValue SrcOp = N->getOperand(0);
3282532826
if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
32826-
SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1)))
32827+
SrcOp.getValueType() == MVT::f64 &&
32828+
SrcOp.getOperand(0).getValueType() == VT &&
32829+
isNullConstant(SrcOp.getOperand(1)))
3282732830
N = SrcOp.getNode();
3282832831
}
3282932832

@@ -32847,7 +32850,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
3284732850
// movddup (hadd X, X) --> hadd X, X
3284832851
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
3284932852
assert((HOp.getValueType() == MVT::v2f64 ||
32850-
HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
32853+
HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
32854+
"Unexpected type for h-op");
3285132855
return HOp;
3285232856
}
3285332857

‎llvm/test/CodeGen/X86/haddsub-3.ll

+65-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
33
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
4-
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
4+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
56

67
define float @pr26491(<4 x float> %a0) {
78
; SSE2-LABEL: pr26491:
@@ -37,3 +38,66 @@ define float @pr26491(<4 x float> %a0) {
3738
%5 = fadd float %3, %4
3839
ret float %5
3940
}
41+
42+
; When simplifying away a splat (broadcast), the hop type must match the shuffle type.
43+
44+
define <4 x double> @PR41414(i64 %x, <4 x double> %y) {
45+
; SSE2-LABEL: PR41414:
46+
; SSE2: # %bb.0:
47+
; SSE2-NEXT: movq %rdi, %xmm2
48+
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
49+
; SSE2-NEXT: subpd {{.*}}(%rip), %xmm2
50+
; SSE2-NEXT: movapd %xmm2, %xmm3
51+
; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
52+
; SSE2-NEXT: addpd %xmm2, %xmm3
53+
; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0,0]
54+
; SSE2-NEXT: divpd %xmm3, %xmm1
55+
; SSE2-NEXT: divpd %xmm3, %xmm0
56+
; SSE2-NEXT: xorpd %xmm2, %xmm2
57+
; SSE2-NEXT: addpd %xmm2, %xmm0
58+
; SSE2-NEXT: addpd %xmm2, %xmm1
59+
; SSE2-NEXT: retq
60+
;
61+
; SSSE3-LABEL: PR41414:
62+
; SSSE3: # %bb.0:
63+
; SSSE3-NEXT: movq %rdi, %xmm2
64+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1]
65+
; SSSE3-NEXT: subpd {{.*}}(%rip), %xmm2
66+
; SSSE3-NEXT: haddpd %xmm2, %xmm2
67+
; SSSE3-NEXT: divpd %xmm2, %xmm1
68+
; SSSE3-NEXT: divpd %xmm2, %xmm0
69+
; SSSE3-NEXT: xorpd %xmm2, %xmm2
70+
; SSSE3-NEXT: addpd %xmm2, %xmm0
71+
; SSSE3-NEXT: addpd %xmm2, %xmm1
72+
; SSSE3-NEXT: retq
73+
;
74+
; AVX1-LABEL: PR41414:
75+
; AVX1: # %bb.0:
76+
; AVX1-NEXT: vmovq %rdi, %xmm1
77+
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
78+
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
79+
; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
80+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
81+
; AVX1-NEXT: vdivpd %ymm1, %ymm0, %ymm0
82+
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
83+
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
84+
; AVX1-NEXT: retq
85+
;
86+
; AVX2-LABEL: PR41414:
87+
; AVX2: # %bb.0:
88+
; AVX2-NEXT: vmovq %rdi, %xmm1
89+
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
90+
; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
91+
; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
92+
; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
93+
; AVX2-NEXT: vdivpd %ymm1, %ymm0, %ymm0
94+
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
95+
; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0
96+
; AVX2-NEXT: retq
97+
%conv = uitofp i64 %x to double
98+
%t0 = insertelement <4 x double> undef, double %conv, i32 0
99+
%t1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer
100+
%t2 = fdiv <4 x double> %y, %t1
101+
%t3 = fadd <4 x double> zeroinitializer, %t2
102+
ret <4 x double> %t3
103+
}

0 commit comments

Comments
 (0)