Skip to content

Commit b504263

Browse files
committedJul 3, 2015
[X86][SSE] Sign extension for target vector sizes less than 128 bits (pt2)
Add support for v2i8/v2i16 to v2f64 by using a sign extension to v2i32 before conversion to v2f64. Differential Revision: http://reviews.llvm.org/D10589 llvm-svn: 241325
1 parent 5dbf346 commit b504263

File tree

3 files changed

+19
-49
lines changed

3 files changed

+19
-49
lines changed
 

Diff for: ‎llvm/lib/Target/X86/X86ISelLowering.cpp

+9-7
Original file line numberDiff line numberDiff line change
@@ -24668,16 +24668,18 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
2466824668

2466924669
// Now move on to more general possibilities.
2467024670
SDValue Op0 = N->getOperand(0);
24671-
EVT InVT = Op0->getValueType(0);
24671+
EVT VT = N->getValueType(0);
24672+
EVT InVT = Op0.getValueType();
24673+
EVT InSVT = InVT.getScalarType();
2467224674

2467324675
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
2467424676
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
24675-
if (InVT == MVT::v8i8 || InVT == MVT::v4i8 ||
24676-
InVT == MVT::v8i16 || InVT == MVT::v4i16) {
24677+
if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
2467724678
SDLoc dl(N);
24678-
MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements());
24679+
EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
24680+
InVT.getVectorNumElements());
2467924681
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
24680-
return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
24682+
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
2468124683
}
2468224684

2468324685
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
@@ -24687,10 +24689,10 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
2468724689
EVT LdVT = Ld->getValueType(0);
2468824690

2468924691
// This transformation is not supported if the result type is f16
24690-
if (N->getValueType(0) == MVT::f16)
24692+
if (VT == MVT::f16)
2469124693
return SDValue();
2469224694

24693-
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
24695+
if (!Ld->isVolatile() && !VT.isVector() &&
2469424696
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
2469524697
!Subtarget->is64Bit() && LdVT == MVT::i64) {
2469624698
SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(

Diff for: ‎llvm/test/CodeGen/X86/vec_int_to_fp.ll

+9-41
Original file line numberDiff line numberDiff line change
@@ -50,31 +50,15 @@ define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
5050
define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) {
5151
; SSE2-LABEL: sitofp_2vf64_i16:
5252
; SSE2: # BB#0:
53-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
54-
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
55-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
56-
; SSE2-NEXT: movd %xmm1, %rax
57-
; SSE2-NEXT: movswq %ax, %rax
58-
; SSE2-NEXT: movd %xmm0, %rcx
59-
; SSE2-NEXT: movswq %cx, %rcx
60-
; SSE2-NEXT: xorps %xmm0, %xmm0
61-
; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0
62-
; SSE2-NEXT: xorps %xmm1, %xmm1
63-
; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
64-
; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
53+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
54+
; SSE2-NEXT: psrad $16, %xmm0
55+
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
6556
; SSE2-NEXT: retq
6657
;
6758
; AVX-LABEL: sitofp_2vf64_i16:
6859
; AVX: # BB#0:
69-
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
70-
; AVX-NEXT: vmovq %xmm0, %rax
71-
; AVX-NEXT: movswq %ax, %rax
72-
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
73-
; AVX-NEXT: movswq %cx, %rcx
74-
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
75-
; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0
76-
; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
77-
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
60+
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
61+
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
7862
; AVX-NEXT: retq
7963
%shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
8064
%cvt = sitofp <2 x i16> %shuf to <2 x double>
@@ -86,30 +70,14 @@ define <2 x double> @sitofp_2vf64_i8(<16 x i8> %a) {
8670
; SSE2: # BB#0:
8771
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
8872
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
89-
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
90-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
91-
; SSE2-NEXT: movd %xmm1, %rax
92-
; SSE2-NEXT: movsbq %al, %rax
93-
; SSE2-NEXT: movd %xmm0, %rcx
94-
; SSE2-NEXT: movsbq %cl, %rcx
95-
; SSE2-NEXT: xorps %xmm0, %xmm0
96-
; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0
97-
; SSE2-NEXT: xorps %xmm1, %xmm1
98-
; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
99-
; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
73+
; SSE2-NEXT: psrad $24, %xmm0
74+
; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
10075
; SSE2-NEXT: retq
10176
;
10277
; AVX-LABEL: sitofp_2vf64_i8:
10378
; AVX: # BB#0:
104-
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
105-
; AVX-NEXT: vmovq %xmm0, %rax
106-
; AVX-NEXT: movsbq %al, %rax
107-
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
108-
; AVX-NEXT: movsbq %cl, %rcx
109-
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
110-
; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0
111-
; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
112-
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
79+
; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
80+
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
11381
; AVX-NEXT: retq
11482
%shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
11583
%cvt = sitofp <2 x i8> %shuf to <2 x double>

Diff for: ‎llvm/test/CodeGen/X86/widen_conv-3.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
2-
; CHECK: cvtsi2ss
2+
; CHECK: cvtdq2ps
33

44
; sign to float v2i16 to v2f32
55

0 commit comments

Comments
 (0)