Skip to content

Commit 124d328

Browse files
author
Elad Cohen
committedAug 17, 2017
[SelectionDAG] Teach the vector-types operand scalarizer about SETCC
When v1i1 is legal (e.g. AVX512) the legalizer can reach a case where a v1i1 SETCC with an illgeal vector type operand wasn't scalarized (since v1i1 is legal) but its operands does have to be scalarized. This used to assert because SETCC was missing from the vector operand scalarizer. This patch attemps to teach the legalizer to handle these cases by scalazring the operands, converting the node into a scalar SETCC node. Differential revision: https://reviews.llvm.org/D36651 llvm-svn: 311071
1 parent a7e061f commit 124d328

File tree

3 files changed

+86
-0
lines changed

3 files changed

+86
-0
lines changed
 

Diff for: ‎llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

+1
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
627627
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
628628
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
629629
SDValue ScalarizeVecOp_VSELECT(SDNode *N);
630+
SDValue ScalarizeVecOp_VSETCC(SDNode *N);
630631
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
631632
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
632633

Diff for: ‎llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+33
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
484484
case ISD::VSELECT:
485485
Res = ScalarizeVecOp_VSELECT(N);
486486
break;
487+
case ISD::SETCC:
488+
Res = ScalarizeVecOp_VSETCC(N);
489+
break;
487490
case ISD::STORE:
488491
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
489492
break;
@@ -560,6 +563,36 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
560563
N->getOperand(2));
561564
}
562565

566+
/// If the operand is a vector that needs to be scalarized then the
567+
/// result must be v1i1, so just convert to a scalar SETCC and wrap
568+
/// with a scalar_to_vector since the res type is legal if we got here
569+
SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
570+
assert(N->getValueType(0).isVector() &&
571+
N->getOperand(0).getValueType().isVector() &&
572+
"Operand types must be vectors");
573+
assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
574+
575+
EVT VT = N->getValueType(0);
576+
SDValue LHS = GetScalarizedVector(N->getOperand(0));
577+
SDValue RHS = GetScalarizedVector(N->getOperand(1));
578+
579+
EVT OpVT = N->getOperand(0).getValueType();
580+
EVT NVT = VT.getVectorElementType();
581+
SDLoc DL(N);
582+
// Turn it into a scalar SETCC.
583+
SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
584+
N->getOperand(2));
585+
586+
// Vectors may have a different boolean contents to scalars. Promote the
587+
// value appropriately.
588+
ISD::NodeType ExtendCode =
589+
TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
590+
591+
Res = DAG.getNode(ExtendCode, DL, NVT, Res);
592+
593+
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
594+
}
595+
563596
/// If the value to store is a vector that needs to be scalarized, it must be
564597
/// <1 x ty>. Just store the element.
565598
SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){

Diff for: ‎llvm/test/CodeGen/X86/pr34177.ll

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mattr=+avx512f | FileCheck %s
3+
; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s
4+
5+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
define void @test() local_unnamed_addr {
9+
; CHECK-LABEL: test:
10+
; CHECK: # BB#0:
11+
; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,3]
12+
; CHECK-NEXT: vpextrq $1, %xmm0, %rax
13+
; CHECK-NEXT: vmovq %xmm0, %rcx
14+
; CHECK-NEXT: negq %rdx
15+
; CHECK-NEXT: fld1
16+
; CHECK-NEXT: fldz
17+
; CHECK-NEXT: fld %st(0)
18+
; CHECK-NEXT: fcmove %st(2), %st(0)
19+
; CHECK-NEXT: cmpq %rax, %rcx
20+
; CHECK-NEXT: fld %st(1)
21+
; CHECK-NEXT: fcmove %st(3), %st(0)
22+
; CHECK-NEXT: cmpq %rax, %rax
23+
; CHECK-NEXT: fld %st(2)
24+
; CHECK-NEXT: fcmove %st(4), %st(0)
25+
; CHECK-NEXT: movl $1, %eax
26+
; CHECK-NEXT: cmpq %rax, %rax
27+
; CHECK-NEXT: fld %st(3)
28+
; CHECK-NEXT: fcmove %st(5), %st(0)
29+
; CHECK-NEXT: fstp %st(5)
30+
; CHECK-NEXT: fxch %st(2)
31+
; CHECK-NEXT: fadd %st(3)
32+
; CHECK-NEXT: fxch %st(4)
33+
; CHECK-NEXT: fadd %st(3)
34+
; CHECK-NEXT: fxch %st(2)
35+
; CHECK-NEXT: fadd %st(3)
36+
; CHECK-NEXT: fxch %st(1)
37+
; CHECK-NEXT: faddp %st(3)
38+
; CHECK-NEXT: fxch %st(3)
39+
; CHECK-NEXT: fstpt (%rax)
40+
; CHECK-NEXT: fxch %st(1)
41+
; CHECK-NEXT: fstpt (%rax)
42+
; CHECK-NEXT: fxch %st(1)
43+
; CHECK-NEXT: fstpt (%rax)
44+
; CHECK-NEXT: fstpt (%rax)
45+
%1 = icmp eq <4 x i64> <i64 0, i64 1, i64 2, i64 3>, undef
46+
%2 = select <4 x i1> %1, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
47+
%3 = fadd <4 x x86_fp80> undef, %2
48+
%4 = shufflevector <4 x x86_fp80> %3, <4 x x86_fp80> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
49+
store <8 x x86_fp80> %4, <8 x x86_fp80>* undef, align 16
50+
unreachable
51+
}
52+

0 commit comments

Comments
 (0)
Please sign in to comment.