Skip to content

Commit b955bf3

Browse files
committedNov 26, 2018
[LegalizeVectorTypes][X86][ARM][AArch64][PowerPC] Don't use SplitVecOp_TruncateHelper for FP_TO_SINT/UINT.
SplitVecOp_TruncateHelper tries to promote the result type while splitting FP_TO_SINT/UINT. It then concatenates the result and introduces a truncate to the original result type. But it does this without inserting the AssertZExt/AssertSExt that the regular result type promotion would insert. Nor does it turn FP_TO_UINT into FP_TO_SINT the way normal result type promotion for these operations does. This is bad on X86 which doesn't support FP_TO_SINT until AVX512. This patch disables the use of SplitVecOp_TruncateHelper for these operations and just lets normal promotion handle it. I've tweaked a couple things in X86ISelLowering to avoid a few obvious regressions there. I believe all the changes on X86 are improvements. The other targets look neutral. Differential Revision: https://reviews.llvm.org/D54906 llvm-svn: 347593
1 parent 5f312ad commit b955bf3

File tree

10 files changed

+256
-516
lines changed

10 files changed

+256
-516
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+2-7
Original file line numberDiff line numberDiff line change
@@ -1694,20 +1694,15 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
16941694
case ISD::VSELECT:
16951695
Res = SplitVecOp_VSELECT(N, OpNo);
16961696
break;
1697-
case ISD::FP_TO_SINT:
1698-
case ISD::FP_TO_UINT:
1699-
if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
1700-
Res = SplitVecOp_TruncateHelper(N);
1701-
else
1702-
Res = SplitVecOp_UnaryOp(N);
1703-
break;
17041697
case ISD::SINT_TO_FP:
17051698
case ISD::UINT_TO_FP:
17061699
if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
17071700
Res = SplitVecOp_TruncateHelper(N);
17081701
else
17091702
Res = SplitVecOp_UnaryOp(N);
17101703
break;
1704+
case ISD::FP_TO_SINT:
1705+
case ISD::FP_TO_UINT:
17111706
case ISD::CTTZ:
17121707
case ISD::CTLZ:
17131708
case ISD::CTPOP:

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+9-5
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
909909
setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom);
910910
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
911911

912+
// By marking FP_TO_SINT v8i16 as Custom, will trick type legalization into
913+
// promoting v8i8 FP_TO_UINT into FP_TO_SINT. When the v8i16 FP_TO_SINT is
914+
// split again based on the input type, this will cause an AssertSExt i16 to
915+
// be emitted instead of an AssertZExt. This will allow packssdw followed by
916+
// packuswb to be used to truncate to v8i8. This is necessary since packusdw
917+
// isn't available until sse4.1.
918+
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
919+
912920
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
913921
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
914922

@@ -26458,11 +26466,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
2645826466
unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);
2645926467
MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth),
2646026468
VT.getVectorNumElements());
26461-
unsigned Opc = N->getOpcode();
26462-
if (PromoteVT == MVT::v2i32 || PromoteVT == MVT::v4i32)
26463-
Opc = ISD::FP_TO_SINT;
26464-
26465-
SDValue Res = DAG.getNode(Opc, dl, PromoteVT, Src);
26469+
SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src);
2646626470

2646726471
// Preserve what we know about the size of the original result. Except
2646826472
// when the result is v2i32 since we can't widen the assert.

‎llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll

+18-18
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,30 @@
22

33

44
define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
5-
; CHECK: fptosi_v4f64_to_v4i16
5+
; CHECK-LABEL: fptosi_v4f64_to_v4i16
66
; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v0.2d
77
; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v1.2d
8-
; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d
9-
; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d
10-
; CHECK: xtn v0.4h, v[[MID]].4s
8+
; CHECK-DAG: xtn v[[XTN0:[0-9]+]].2s, v[[LHS]].2d
9+
; CHECK-DAG: xtn v[[XTN1:[0-9]+]].2s, v[[RHS]].2d
10+
; CHECK: uzp1 v0.4h, v[[XTN1]].4h, v[[XTN0]].4h
1111
%tmp1 = load <4 x double>, <4 x double>* %ptr
1212
%tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
1313
ret <4 x i16> %tmp2
1414
}
1515

1616
define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
17-
; CHECK: fptosi_v4f64_to_v4i8
17+
; CHECK-LABEL: fptosi_v4f64_to_v4i8
1818
; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d
1919
; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
2020
; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
2121
; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
22-
; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
23-
; CHECK-DAG: xtn2 v[[NA2]].4s, v[[CONV3]].2d
24-
; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
25-
; CHECK-DAG: xtn2 v[[NA0]].4s, v[[CONV1]].2d
26-
; CHECK-DAG: xtn v[[TMP1:[0-9]+]].4h, v[[NA2]].4s
27-
; CHECK-DAG: xtn2 v[[TMP1]].8h, v[[NA0]].4s
28-
; CHECK: xtn v0.8b, v[[TMP1]].8h
22+
; CHECK-DAG: xtn v[[XTN0:[0-9]+]].2s, v[[CONV0]].2d
23+
; CHECK-DAG: xtn v[[XTN1:[0-9]+]].2s, v[[CONV1]].2d
24+
; CHECK-DAG: xtn v[[XTN2:[0-9]+]].2s, v[[CONV2]].2d
25+
; CHECK-DAG: xtn v[[XTN3:[0-9]+]].2s, v[[CONV3]].2d
26+
; CHECK-DAG: uzp1 v[[UZP0:[0-9]+]].4h, v[[XTN1]].4h, v[[XTN0]].4h
27+
; CHECK-DAG: uzp1 v[[UZP1:[0-9]+]].4h, v[[XTN3]].4h, v[[XTN2]].4h
28+
; CHECK: uzp1 v0.8b, v[[UZP1:[0-9]+]].8b, v[[UZP0:[0-9]+]].8b
2929
%tmp1 = load <8 x double>, <8 x double>* %ptr
3030
%tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
3131
ret <8 x i8> %tmp2
@@ -54,12 +54,12 @@ define <4 x i16> @trunc_v4i64_to_v4i16(<4 x i64>* %ptr) {
5454
}
5555

5656
define <4 x i16> @fptoui_v4f64_to_v4i16(<4 x double>* %ptr) {
57-
; CHECK: fptoui_v4f64_to_v4i16
58-
; CHECK-DAG: fcvtzu v[[LHS:[0-9]+]].2d, v0.2d
59-
; CHECK-DAG: fcvtzu v[[RHS:[0-9]+]].2d, v1.2d
60-
; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d
61-
; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d
62-
; CHECK: xtn v0.4h, v[[MID]].4s
57+
; CHECK-LABEL: fptoui_v4f64_to_v4i16
58+
; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v0.2d
59+
; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v1.2d
60+
; CHECK-DAG: xtn v[[XTN0:[0-9]+]].2s, v[[LHS]].2d
61+
; CHECK-DAG: xtn v[[XTN1:[0-9]+]].2s, v[[RHS]].2d
62+
; CHECK: uzp1 v0.4h, v[[XTN1]].4h, v[[XTN0]].4h
6363
%tmp1 = load <4 x double>, <4 x double>* %ptr
6464
%tmp2 = fptoui <4 x double> %tmp1 to <4 x i16>
6565
ret <4 x i16> %tmp2

‎llvm/test/CodeGen/AArch64/vcvt-oversize.ll

+7-7
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
define <8 x i8> @float_to_i8(<8 x float>* %in) {
44
; CHECK-LABEL: float_to_i8:
5-
; CHECK: ldp q1, q0, [x0]
6-
; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v1.4s, v1.4s
7-
; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v0.4s, v0.4s
8-
; CHECK-DAG: fcvtzu v[[LSB2:[0-9]+]].4s, v[[LSB]].4s
9-
; CHECK-DAG: fcvtzu v[[MSB2:[0-9]+]].4s, v[[MSB]].4s
5+
; CHECK: ldp q0, q1, [x0]
6+
; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v0.4s, v0.4s
7+
; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v1.4s, v1.4s
8+
; CHECK-DAG: fcvtzs v[[LSB2:[0-9]+]].4s, v[[LSB]].4s
9+
; CHECK-DAG: fcvtzs v[[MSB2:[0-9]+]].4s, v[[MSB]].4s
1010
; CHECK-DAG: xtn v[[TMP:[0-9]+]].4h, v[[LSB]].4s
11-
; CHECK-DAG: xtn2 v[[TMP]].8h, v[[MSB]].4s
12-
; CHECK-DAG: xtn v0.8b, v[[TMP]].8h
11+
; CHECK-DAG: xtn v[[TMP2:[0-9]+]].4h, v[[MSB]].4s
12+
; CHECK-DAG: uzp1 v0.8b, v[[TMP]].8b, v[[TMP2]].8b
1313
%l = load <8 x float>, <8 x float>* %in
1414
%scale = fmul <8 x float> %l, <float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>
1515
%conv = fptoui <8 x float> %scale to <8 x i8>

‎llvm/test/CodeGen/ARM/vcvt.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -293,22 +293,22 @@ define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
293293
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
294294
; CHECK-NEXT: vmov d19, r2, r3
295295
; CHECK-NEXT: vadd.f64 d18, d18, d18
296-
; CHECK-NEXT: vcvt.u32.f64 s0, d18
296+
; CHECK-NEXT: vcvt.s32.f64 s0, d18
297297
; CHECK-NEXT: vmov r0, s0
298298
; CHECK-NEXT: vadd.f64 d20, d16, d16
299299
; CHECK-NEXT: vadd.f64 d19, d19, d19
300300
; CHECK-NEXT: vadd.f64 d16, d17, d17
301-
; CHECK-NEXT: vcvt.u32.f64 s2, d20
302-
; CHECK-NEXT: vcvt.u32.f64 s4, d19
303-
; CHECK-NEXT: vcvt.u32.f64 s6, d16
301+
; CHECK-NEXT: vcvt.s32.f64 s2, d20
302+
; CHECK-NEXT: vcvt.s32.f64 s4, d19
303+
; CHECK-NEXT: vcvt.s32.f64 s6, d16
304304
; CHECK-NEXT: vmov.32 d16[0], r0
305305
; CHECK-NEXT: vmov r0, s2
306306
; CHECK-NEXT: vmov.32 d17[0], r0
307307
; CHECK-NEXT: vmov r0, s4
308308
; CHECK-NEXT: vmov.32 d16[1], r0
309309
; CHECK-NEXT: vmov r0, s6
310310
; CHECK-NEXT: vmov.32 d17[1], r0
311-
; CHECK-NEXT: vmovn.i32 d16, q8
311+
; CHECK-NEXT: vuzp.16 d16, d17
312312
; CHECK-NEXT: vmov r0, r1, d16
313313
; CHECK-NEXT: mov pc, lr
314314

‎llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll

+72-72
Original file line numberDiff line numberDiff line change
@@ -166,19 +166,19 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
166166
; CHECK-P8-NEXT: lxvd2x vs2, r3, r4
167167
; CHECK-P8-NEXT: li r4, 48
168168
; CHECK-P8-NEXT: lxvd2x vs3, r3, r4
169-
; CHECK-P8-NEXT: xscvdpuxws f4, f0
169+
; CHECK-P8-NEXT: xscvdpsxws f4, f0
170170
; CHECK-P8-NEXT: xxswapd vs0, vs0
171-
; CHECK-P8-NEXT: xscvdpuxws f5, f1
171+
; CHECK-P8-NEXT: xscvdpsxws f5, f1
172172
; CHECK-P8-NEXT: xxswapd vs1, vs1
173-
; CHECK-P8-NEXT: xscvdpuxws f6, f2
173+
; CHECK-P8-NEXT: xscvdpsxws f6, f2
174174
; CHECK-P8-NEXT: xxswapd vs2, vs2
175-
; CHECK-P8-NEXT: xscvdpuxws f7, f3
175+
; CHECK-P8-NEXT: xscvdpsxws f7, f3
176176
; CHECK-P8-NEXT: xxswapd vs3, vs3
177-
; CHECK-P8-NEXT: xscvdpuxws f0, f0
178-
; CHECK-P8-NEXT: xscvdpuxws f1, f1
177+
; CHECK-P8-NEXT: xscvdpsxws f0, f0
178+
; CHECK-P8-NEXT: xscvdpsxws f1, f1
179179
; CHECK-P8-NEXT: mfvsrwz r3, f4
180-
; CHECK-P8-NEXT: xscvdpuxws f2, f2
181-
; CHECK-P8-NEXT: xscvdpuxws f3, f3
180+
; CHECK-P8-NEXT: xscvdpsxws f2, f2
181+
; CHECK-P8-NEXT: xscvdpsxws f3, f3
182182
; CHECK-P8-NEXT: mfvsrwz r4, f5
183183
; CHECK-P8-NEXT: mtvsrd f4, r3
184184
; CHECK-P8-NEXT: mfvsrwz r3, f6
@@ -221,14 +221,14 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
221221
; CHECK-P9-NEXT: xxswapd vs5, vs2
222222
; CHECK-P9-NEXT: xxswapd vs6, vs1
223223
; CHECK-P9-NEXT: xxswapd vs7, vs0
224-
; CHECK-P9-NEXT: xscvdpuxws f3, f3
225-
; CHECK-P9-NEXT: xscvdpuxws f2, f2
226-
; CHECK-P9-NEXT: xscvdpuxws f1, f1
227-
; CHECK-P9-NEXT: xscvdpuxws f0, f0
228-
; CHECK-P9-NEXT: xscvdpuxws f4, f4
229-
; CHECK-P9-NEXT: xscvdpuxws f5, f5
230-
; CHECK-P9-NEXT: xscvdpuxws f6, f6
231-
; CHECK-P9-NEXT: xscvdpuxws f7, f7
224+
; CHECK-P9-NEXT: xscvdpsxws f3, f3
225+
; CHECK-P9-NEXT: xscvdpsxws f2, f2
226+
; CHECK-P9-NEXT: xscvdpsxws f1, f1
227+
; CHECK-P9-NEXT: xscvdpsxws f0, f0
228+
; CHECK-P9-NEXT: xscvdpsxws f4, f4
229+
; CHECK-P9-NEXT: xscvdpsxws f5, f5
230+
; CHECK-P9-NEXT: xscvdpsxws f6, f6
231+
; CHECK-P9-NEXT: xscvdpsxws f7, f7
232232
; CHECK-P9-NEXT: mfvsrwz r3, f3
233233
; CHECK-P9-NEXT: mfvsrwz r5, f2
234234
; CHECK-P9-NEXT: mfvsrwz r7, f1
@@ -272,14 +272,14 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
272272
; CHECK-BE-NEXT: xxswapd vs5, vs2
273273
; CHECK-BE-NEXT: xxswapd vs6, vs1
274274
; CHECK-BE-NEXT: xxswapd vs7, vs0
275-
; CHECK-BE-NEXT: xscvdpuxws f3, f3
276-
; CHECK-BE-NEXT: xscvdpuxws f2, f2
277-
; CHECK-BE-NEXT: xscvdpuxws f1, f1
278-
; CHECK-BE-NEXT: xscvdpuxws f0, f0
279-
; CHECK-BE-NEXT: xscvdpuxws f4, f4
280-
; CHECK-BE-NEXT: xscvdpuxws f5, f5
281-
; CHECK-BE-NEXT: xscvdpuxws f6, f6
282-
; CHECK-BE-NEXT: xscvdpuxws f7, f7
275+
; CHECK-BE-NEXT: xscvdpsxws f3, f3
276+
; CHECK-BE-NEXT: xscvdpsxws f2, f2
277+
; CHECK-BE-NEXT: xscvdpsxws f1, f1
278+
; CHECK-BE-NEXT: xscvdpsxws f0, f0
279+
; CHECK-BE-NEXT: xscvdpsxws f4, f4
280+
; CHECK-BE-NEXT: xscvdpsxws f5, f5
281+
; CHECK-BE-NEXT: xscvdpsxws f6, f6
282+
; CHECK-BE-NEXT: xscvdpsxws f7, f7
283283
; CHECK-BE-NEXT: mfvsrwz r3, f3
284284
; CHECK-BE-NEXT: mfvsrwz r5, f2
285285
; CHECK-BE-NEXT: mfvsrwz r7, f1
@@ -329,60 +329,60 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
329329
; CHECK-P8-NEXT: li r6, 48
330330
; CHECK-P8-NEXT: lxvd2x vs3, r4, r6
331331
; CHECK-P8-NEXT: li r6, 64
332-
; CHECK-P8-NEXT: xscvdpuxws f4, f0
332+
; CHECK-P8-NEXT: xscvdpsxws f4, f0
333333
; CHECK-P8-NEXT: lxvd2x vs5, r4, r6
334334
; CHECK-P8-NEXT: li r6, 80
335335
; CHECK-P8-NEXT: xxswapd vs0, vs0
336-
; CHECK-P8-NEXT: xscvdpuxws f6, f1
336+
; CHECK-P8-NEXT: xscvdpsxws f6, f1
337337
; CHECK-P8-NEXT: lxvd2x vs7, r4, r6
338338
; CHECK-P8-NEXT: li r6, 96
339339
; CHECK-P8-NEXT: xxswapd vs1, vs1
340-
; CHECK-P8-NEXT: xscvdpuxws f8, f2
340+
; CHECK-P8-NEXT: xscvdpsxws f8, f2
341341
; CHECK-P8-NEXT: lxvd2x vs9, r4, r6
342342
; CHECK-P8-NEXT: li r6, 112
343343
; CHECK-P8-NEXT: xxswapd vs2, vs2
344-
; CHECK-P8-NEXT: xscvdpuxws f10, f3
344+
; CHECK-P8-NEXT: xscvdpsxws f10, f3
345345
; CHECK-P8-NEXT: lxvd2x vs11, r4, r6
346346
; CHECK-P8-NEXT: xxswapd vs3, vs3
347-
; CHECK-P8-NEXT: xscvdpuxws f12, f5
347+
; CHECK-P8-NEXT: xscvdpsxws f12, f5
348348
; CHECK-P8-NEXT: xxswapd vs5, vs5
349-
; CHECK-P8-NEXT: xscvdpuxws f13, f7
349+
; CHECK-P8-NEXT: xscvdpsxws f13, f7
350350
; CHECK-P8-NEXT: xxswapd vs7, vs7
351-
; CHECK-P8-NEXT: xscvdpuxws v2, f9
351+
; CHECK-P8-NEXT: xscvdpsxws v2, f9
352352
; CHECK-P8-NEXT: xxswapd vs9, vs9
353353
; CHECK-P8-NEXT: mfvsrwz r4, f4
354-
; CHECK-P8-NEXT: xscvdpuxws v3, f11
354+
; CHECK-P8-NEXT: xscvdpsxws v3, f11
355355
; CHECK-P8-NEXT: xxswapd vs11, vs11
356-
; CHECK-P8-NEXT: xscvdpuxws f0, f0
356+
; CHECK-P8-NEXT: xscvdpsxws f0, f0
357357
; CHECK-P8-NEXT: mfvsrwz r6, f6
358358
; CHECK-P8-NEXT: mtvsrd f4, r4
359359
; CHECK-P8-NEXT: mfvsrwz r4, f8
360-
; CHECK-P8-NEXT: xscvdpuxws f1, f1
360+
; CHECK-P8-NEXT: xscvdpsxws f1, f1
361361
; CHECK-P8-NEXT: xxswapd v4, vs4
362-
; CHECK-P8-NEXT: xscvdpuxws f2, f2
362+
; CHECK-P8-NEXT: xscvdpsxws f2, f2
363363
; CHECK-P8-NEXT: mtvsrd f6, r6
364364
; CHECK-P8-NEXT: mfvsrwz r6, f10
365365
; CHECK-P8-NEXT: mtvsrd f8, r4
366366
; CHECK-P8-NEXT: xxswapd v5, vs6
367367
; CHECK-P8-NEXT: mfvsrwz r4, f12
368-
; CHECK-P8-NEXT: xscvdpuxws f5, f5
368+
; CHECK-P8-NEXT: xscvdpsxws f5, f5
369369
; CHECK-P8-NEXT: xxswapd v0, vs8
370370
; CHECK-P8-NEXT: mtvsrd f10, r6
371371
; CHECK-P8-NEXT: mfvsrwz r6, f13
372372
; CHECK-P8-NEXT: mtvsrd f12, r4
373373
; CHECK-P8-NEXT: xxswapd v1, vs10
374374
; CHECK-P8-NEXT: mfvsrwz r4, v2
375-
; CHECK-P8-NEXT: xscvdpuxws f3, f3
375+
; CHECK-P8-NEXT: xscvdpsxws f3, f3
376376
; CHECK-P8-NEXT: xxswapd v6, vs12
377-
; CHECK-P8-NEXT: xscvdpuxws f9, f9
377+
; CHECK-P8-NEXT: xscvdpsxws f9, f9
378378
; CHECK-P8-NEXT: mtvsrd f13, r6
379379
; CHECK-P8-NEXT: mfvsrwz r6, v3
380380
; CHECK-P8-NEXT: mtvsrd v2, r4
381381
; CHECK-P8-NEXT: xxswapd v7, vs13
382382
; CHECK-P8-NEXT: mfvsrwz r4, f0
383-
; CHECK-P8-NEXT: xscvdpuxws f7, f7
383+
; CHECK-P8-NEXT: xscvdpsxws f7, f7
384384
; CHECK-P8-NEXT: xxswapd v2, v2
385-
; CHECK-P8-NEXT: xscvdpuxws f11, f11
385+
; CHECK-P8-NEXT: xscvdpsxws f11, f11
386386
; CHECK-P8-NEXT: mtvsrd v3, r6
387387
; CHECK-P8-NEXT: mfvsrwz r6, f1
388388
; CHECK-P8-NEXT: mtvsrd f0, r4
@@ -450,22 +450,22 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
450450
; CHECK-P9-NEXT: xxswapd vs13, vs3
451451
; CHECK-P9-NEXT: xxswapd v2, vs1
452452
; CHECK-P9-NEXT: xxswapd v3, vs0
453-
; CHECK-P9-NEXT: xscvdpuxws f6, f6
454-
; CHECK-P9-NEXT: xscvdpuxws f5, f5
455-
; CHECK-P9-NEXT: xscvdpuxws f4, f4
456-
; CHECK-P9-NEXT: xscvdpuxws f2, f2
457-
; CHECK-P9-NEXT: xscvdpuxws f7, f7
458-
; CHECK-P9-NEXT: xscvdpuxws f3, f3
459-
; CHECK-P9-NEXT: xscvdpuxws f1, f1
460-
; CHECK-P9-NEXT: xscvdpuxws f0, f0
461-
; CHECK-P9-NEXT: xscvdpuxws f8, f8
462-
; CHECK-P9-NEXT: xscvdpuxws f9, f9
463-
; CHECK-P9-NEXT: xscvdpuxws f10, f10
464-
; CHECK-P9-NEXT: xscvdpuxws f11, f11
465-
; CHECK-P9-NEXT: xscvdpuxws f12, f12
466-
; CHECK-P9-NEXT: xscvdpuxws f13, f13
467-
; CHECK-P9-NEXT: xscvdpuxws v2, v2
468-
; CHECK-P9-NEXT: xscvdpuxws v3, v3
453+
; CHECK-P9-NEXT: xscvdpsxws f6, f6
454+
; CHECK-P9-NEXT: xscvdpsxws f5, f5
455+
; CHECK-P9-NEXT: xscvdpsxws f4, f4
456+
; CHECK-P9-NEXT: xscvdpsxws f2, f2
457+
; CHECK-P9-NEXT: xscvdpsxws f7, f7
458+
; CHECK-P9-NEXT: xscvdpsxws f3, f3
459+
; CHECK-P9-NEXT: xscvdpsxws f1, f1
460+
; CHECK-P9-NEXT: xscvdpsxws f0, f0
461+
; CHECK-P9-NEXT: xscvdpsxws f8, f8
462+
; CHECK-P9-NEXT: xscvdpsxws f9, f9
463+
; CHECK-P9-NEXT: xscvdpsxws f10, f10
464+
; CHECK-P9-NEXT: xscvdpsxws f11, f11
465+
; CHECK-P9-NEXT: xscvdpsxws f12, f12
466+
; CHECK-P9-NEXT: xscvdpsxws f13, f13
467+
; CHECK-P9-NEXT: xscvdpsxws v2, v2
468+
; CHECK-P9-NEXT: xscvdpsxws v3, v3
469469
; CHECK-P9-NEXT: mfvsrwz r4, f6
470470
; CHECK-P9-NEXT: mfvsrwz r5, f5
471471
; CHECK-P9-NEXT: mfvsrwz r6, f4
@@ -562,22 +562,22 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
562562
; CHECK-BE-NEXT: xxswapd vs13, vs3
563563
; CHECK-BE-NEXT: xxswapd v2, vs1
564564
; CHECK-BE-NEXT: xxswapd v3, vs0
565-
; CHECK-BE-NEXT: xscvdpuxws f6, f6
566-
; CHECK-BE-NEXT: xscvdpuxws f5, f5
567-
; CHECK-BE-NEXT: xscvdpuxws f4, f4
568-
; CHECK-BE-NEXT: xscvdpuxws f2, f2
569-
; CHECK-BE-NEXT: xscvdpuxws f7, f7
570-
; CHECK-BE-NEXT: xscvdpuxws f3, f3
571-
; CHECK-BE-NEXT: xscvdpuxws f1, f1
572-
; CHECK-BE-NEXT: xscvdpuxws f0, f0
573-
; CHECK-BE-NEXT: xscvdpuxws f8, f8
574-
; CHECK-BE-NEXT: xscvdpuxws f9, f9
575-
; CHECK-BE-NEXT: xscvdpuxws f10, f10
576-
; CHECK-BE-NEXT: xscvdpuxws f11, f11
577-
; CHECK-BE-NEXT: xscvdpuxws f12, f12
578-
; CHECK-BE-NEXT: xscvdpuxws f13, f13
579-
; CHECK-BE-NEXT: xscvdpuxws v2, v2
580-
; CHECK-BE-NEXT: xscvdpuxws v3, v3
565+
; CHECK-BE-NEXT: xscvdpsxws f6, f6
566+
; CHECK-BE-NEXT: xscvdpsxws f5, f5
567+
; CHECK-BE-NEXT: xscvdpsxws f4, f4
568+
; CHECK-BE-NEXT: xscvdpsxws f2, f2
569+
; CHECK-BE-NEXT: xscvdpsxws f7, f7
570+
; CHECK-BE-NEXT: xscvdpsxws f3, f3
571+
; CHECK-BE-NEXT: xscvdpsxws f1, f1
572+
; CHECK-BE-NEXT: xscvdpsxws f0, f0
573+
; CHECK-BE-NEXT: xscvdpsxws f8, f8
574+
; CHECK-BE-NEXT: xscvdpsxws f9, f9
575+
; CHECK-BE-NEXT: xscvdpsxws f10, f10
576+
; CHECK-BE-NEXT: xscvdpsxws f11, f11
577+
; CHECK-BE-NEXT: xscvdpsxws f12, f12
578+
; CHECK-BE-NEXT: xscvdpsxws f13, f13
579+
; CHECK-BE-NEXT: xscvdpsxws v2, v2
580+
; CHECK-BE-NEXT: xscvdpsxws v3, v3
581581
; CHECK-BE-NEXT: mfvsrwz r4, f6
582582
; CHECK-BE-NEXT: mfvsrwz r5, f5
583583
; CHECK-BE-NEXT: mfvsrwz r6, f4

‎llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll

+48-48
Original file line numberDiff line numberDiff line change
@@ -343,60 +343,60 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
343343
; CHECK-P8-NEXT: li r4, 48
344344
; CHECK-P8-NEXT: lxvd2x vs3, r3, r4
345345
; CHECK-P8-NEXT: li r4, 64
346-
; CHECK-P8-NEXT: xscvdpuxws f4, f0
346+
; CHECK-P8-NEXT: xscvdpsxws f4, f0
347347
; CHECK-P8-NEXT: xxswapd vs0, vs0
348348
; CHECK-P8-NEXT: lxvd2x vs5, r3, r4
349349
; CHECK-P8-NEXT: li r4, 80
350-
; CHECK-P8-NEXT: xscvdpuxws f6, f1
350+
; CHECK-P8-NEXT: xscvdpsxws f6, f1
351351
; CHECK-P8-NEXT: xxswapd vs1, vs1
352352
; CHECK-P8-NEXT: lxvd2x vs7, r3, r4
353353
; CHECK-P8-NEXT: li r4, 96
354-
; CHECK-P8-NEXT: xscvdpuxws f8, f2
354+
; CHECK-P8-NEXT: xscvdpsxws f8, f2
355355
; CHECK-P8-NEXT: xxswapd vs2, vs2
356356
; CHECK-P8-NEXT: lxvd2x vs9, r3, r4
357357
; CHECK-P8-NEXT: li r4, 112
358-
; CHECK-P8-NEXT: xscvdpuxws f10, f3
358+
; CHECK-P8-NEXT: xscvdpsxws f10, f3
359359
; CHECK-P8-NEXT: xxswapd vs3, vs3
360360
; CHECK-P8-NEXT: lxvd2x vs11, r3, r4
361-
; CHECK-P8-NEXT: xscvdpuxws f12, f5
361+
; CHECK-P8-NEXT: xscvdpsxws f12, f5
362362
; CHECK-P8-NEXT: xxswapd vs5, vs5
363-
; CHECK-P8-NEXT: xscvdpuxws f13, f7
363+
; CHECK-P8-NEXT: xscvdpsxws f13, f7
364364
; CHECK-P8-NEXT: xxswapd vs7, vs7
365-
; CHECK-P8-NEXT: xscvdpuxws v2, f9
365+
; CHECK-P8-NEXT: xscvdpsxws v2, f9
366366
; CHECK-P8-NEXT: xxswapd vs9, vs9
367367
; CHECK-P8-NEXT: mfvsrwz r3, f4
368-
; CHECK-P8-NEXT: xscvdpuxws v3, f11
368+
; CHECK-P8-NEXT: xscvdpsxws v3, f11
369369
; CHECK-P8-NEXT: xxswapd vs11, vs11
370370
; CHECK-P8-NEXT: mfvsrwz r4, f6
371-
; CHECK-P8-NEXT: xscvdpuxws f0, f0
371+
; CHECK-P8-NEXT: xscvdpsxws f0, f0
372372
; CHECK-P8-NEXT: mtvsrd f4, r3
373373
; CHECK-P8-NEXT: mfvsrwz r3, f8
374-
; CHECK-P8-NEXT: xscvdpuxws f1, f1
374+
; CHECK-P8-NEXT: xscvdpsxws f1, f1
375375
; CHECK-P8-NEXT: xxswapd v4, vs4
376376
; CHECK-P8-NEXT: mtvsrd f6, r4
377377
; CHECK-P8-NEXT: mfvsrwz r4, f10
378-
; CHECK-P8-NEXT: xscvdpuxws f2, f2
378+
; CHECK-P8-NEXT: xscvdpsxws f2, f2
379379
; CHECK-P8-NEXT: xxswapd v5, vs6
380380
; CHECK-P8-NEXT: mtvsrd f8, r3
381381
; CHECK-P8-NEXT: mfvsrwz r3, f12
382-
; CHECK-P8-NEXT: xscvdpuxws f3, f3
382+
; CHECK-P8-NEXT: xscvdpsxws f3, f3
383383
; CHECK-P8-NEXT: xxswapd v0, vs8
384384
; CHECK-P8-NEXT: mtvsrd f10, r4
385385
; CHECK-P8-NEXT: mfvsrwz r4, f13
386-
; CHECK-P8-NEXT: xscvdpuxws f5, f5
386+
; CHECK-P8-NEXT: xscvdpsxws f5, f5
387387
; CHECK-P8-NEXT: xxswapd v1, vs10
388388
; CHECK-P8-NEXT: mtvsrd f12, r3
389389
; CHECK-P8-NEXT: mfvsrwz r3, v2
390-
; CHECK-P8-NEXT: xscvdpuxws f7, f7
390+
; CHECK-P8-NEXT: xscvdpsxws f7, f7
391391
; CHECK-P8-NEXT: xxswapd v6, vs12
392392
; CHECK-P8-NEXT: mtvsrd f13, r4
393393
; CHECK-P8-NEXT: mfvsrwz r4, v3
394394
; CHECK-P8-NEXT: mtvsrd v2, r3
395395
; CHECK-P8-NEXT: xxswapd v7, vs13
396396
; CHECK-P8-NEXT: mfvsrwz r3, f0
397-
; CHECK-P8-NEXT: xscvdpuxws f9, f9
397+
; CHECK-P8-NEXT: xscvdpsxws f9, f9
398398
; CHECK-P8-NEXT: xxswapd v2, v2
399-
; CHECK-P8-NEXT: xscvdpuxws f11, f11
399+
; CHECK-P8-NEXT: xscvdpsxws f11, f11
400400
; CHECK-P8-NEXT: mtvsrd v3, r4
401401
; CHECK-P8-NEXT: mfvsrwz r4, f1
402402
; CHECK-P8-NEXT: mtvsrd f0, r3
@@ -462,22 +462,22 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
462462
; CHECK-P9-NEXT: xxswapd vs13, vs6
463463
; CHECK-P9-NEXT: xxswapd v2, vs1
464464
; CHECK-P9-NEXT: xxswapd v3, vs0
465-
; CHECK-P9-NEXT: xscvdpuxws f5, f5
466-
; CHECK-P9-NEXT: xscvdpuxws f4, f4
467-
; CHECK-P9-NEXT: xscvdpuxws f3, f3
468-
; CHECK-P9-NEXT: xscvdpuxws f2, f2
469-
; CHECK-P9-NEXT: xscvdpuxws f7, f7
470-
; CHECK-P9-NEXT: xscvdpuxws f6, f6
471-
; CHECK-P9-NEXT: xscvdpuxws f1, f1
472-
; CHECK-P9-NEXT: xscvdpuxws f0, f0
473-
; CHECK-P9-NEXT: xscvdpuxws f8, f8
474-
; CHECK-P9-NEXT: xscvdpuxws f9, f9
475-
; CHECK-P9-NEXT: xscvdpuxws f10, f10
476-
; CHECK-P9-NEXT: xscvdpuxws f11, f11
477-
; CHECK-P9-NEXT: xscvdpuxws f12, f12
478-
; CHECK-P9-NEXT: xscvdpuxws f13, f13
479-
; CHECK-P9-NEXT: xscvdpuxws v2, v2
480-
; CHECK-P9-NEXT: xscvdpuxws v3, v3
465+
; CHECK-P9-NEXT: xscvdpsxws f5, f5
466+
; CHECK-P9-NEXT: xscvdpsxws f4, f4
467+
; CHECK-P9-NEXT: xscvdpsxws f3, f3
468+
; CHECK-P9-NEXT: xscvdpsxws f2, f2
469+
; CHECK-P9-NEXT: xscvdpsxws f7, f7
470+
; CHECK-P9-NEXT: xscvdpsxws f6, f6
471+
; CHECK-P9-NEXT: xscvdpsxws f1, f1
472+
; CHECK-P9-NEXT: xscvdpsxws f0, f0
473+
; CHECK-P9-NEXT: xscvdpsxws f8, f8
474+
; CHECK-P9-NEXT: xscvdpsxws f9, f9
475+
; CHECK-P9-NEXT: xscvdpsxws f10, f10
476+
; CHECK-P9-NEXT: xscvdpsxws f11, f11
477+
; CHECK-P9-NEXT: xscvdpsxws f12, f12
478+
; CHECK-P9-NEXT: xscvdpsxws f13, f13
479+
; CHECK-P9-NEXT: xscvdpsxws v2, v2
480+
; CHECK-P9-NEXT: xscvdpsxws v3, v3
481481
; CHECK-P9-NEXT: mfvsrwz r3, f5
482482
; CHECK-P9-NEXT: mfvsrwz r4, f4
483483
; CHECK-P9-NEXT: mfvsrwz r5, f3
@@ -571,22 +571,22 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
571571
; CHECK-BE-NEXT: xxswapd vs13, vs6
572572
; CHECK-BE-NEXT: xxswapd v2, vs1
573573
; CHECK-BE-NEXT: xxswapd v3, vs0
574-
; CHECK-BE-NEXT: xscvdpuxws f5, f5
575-
; CHECK-BE-NEXT: xscvdpuxws f4, f4
576-
; CHECK-BE-NEXT: xscvdpuxws f3, f3
577-
; CHECK-BE-NEXT: xscvdpuxws f2, f2
578-
; CHECK-BE-NEXT: xscvdpuxws f7, f7
579-
; CHECK-BE-NEXT: xscvdpuxws f6, f6
580-
; CHECK-BE-NEXT: xscvdpuxws f1, f1
581-
; CHECK-BE-NEXT: xscvdpuxws f0, f0
582-
; CHECK-BE-NEXT: xscvdpuxws f8, f8
583-
; CHECK-BE-NEXT: xscvdpuxws f9, f9
584-
; CHECK-BE-NEXT: xscvdpuxws f10, f10
585-
; CHECK-BE-NEXT: xscvdpuxws f11, f11
586-
; CHECK-BE-NEXT: xscvdpuxws f12, f12
587-
; CHECK-BE-NEXT: xscvdpuxws f13, f13
588-
; CHECK-BE-NEXT: xscvdpuxws v2, v2
589-
; CHECK-BE-NEXT: xscvdpuxws v3, v3
574+
; CHECK-BE-NEXT: xscvdpsxws f5, f5
575+
; CHECK-BE-NEXT: xscvdpsxws f4, f4
576+
; CHECK-BE-NEXT: xscvdpsxws f3, f3
577+
; CHECK-BE-NEXT: xscvdpsxws f2, f2
578+
; CHECK-BE-NEXT: xscvdpsxws f7, f7
579+
; CHECK-BE-NEXT: xscvdpsxws f6, f6
580+
; CHECK-BE-NEXT: xscvdpsxws f1, f1
581+
; CHECK-BE-NEXT: xscvdpsxws f0, f0
582+
; CHECK-BE-NEXT: xscvdpsxws f8, f8
583+
; CHECK-BE-NEXT: xscvdpsxws f9, f9
584+
; CHECK-BE-NEXT: xscvdpsxws f10, f10
585+
; CHECK-BE-NEXT: xscvdpsxws f11, f11
586+
; CHECK-BE-NEXT: xscvdpsxws f12, f12
587+
; CHECK-BE-NEXT: xscvdpsxws f13, f13
588+
; CHECK-BE-NEXT: xscvdpsxws v2, v2
589+
; CHECK-BE-NEXT: xscvdpsxws v3, v3
590590
; CHECK-BE-NEXT: mfvsrwz r3, f5
591591
; CHECK-BE-NEXT: mfvsrwz r4, f4
592592
; CHECK-BE-NEXT: mfvsrwz r5, f3

‎llvm/test/CodeGen/X86/vec_cast2.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ define <8 x i8> @cvt_v8f32_v8u8(<8 x float> %src) {
245245
; CHECK-WIDE: ## %bb.0:
246246
; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
247247
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
248-
; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
248+
; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
249249
; CHECK-WIDE-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
250250
; CHECK-WIDE-NEXT: vzeroupper
251251
; CHECK-WIDE-NEXT: retl

‎llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll

+49-150
Original file line numberDiff line numberDiff line change
@@ -2444,40 +2444,22 @@ define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
24442444
define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
24452445
; SSE-LABEL: fptosi_8f64_to_8i16:
24462446
; SSE: # %bb.0:
2447+
; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2448+
; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2449+
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
24472450
; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
24482451
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2449-
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2450-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2451-
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,1,3,4,5,6,7]
2452-
; SSE-NEXT: cvttpd2dq %xmm3, %xmm0
2453-
; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2454-
; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2455-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,2,0]
2456-
; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
2457-
; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2452+
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2453+
; SSE-NEXT: packssdw %xmm2, %xmm0
24582454
; SSE-NEXT: retq
24592455
;
2460-
; AVX1-LABEL: fptosi_8f64_to_8i16:
2461-
; AVX1: # %bb.0:
2462-
; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
2463-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2464-
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2465-
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
2466-
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2467-
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2468-
; AVX1-NEXT: vzeroupper
2469-
; AVX1-NEXT: retq
2470-
;
2471-
; AVX2-LABEL: fptosi_8f64_to_8i16:
2472-
; AVX2: # %bb.0:
2473-
; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
2474-
; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
2475-
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2476-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2477-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2478-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2479-
; AVX2-NEXT: vzeroupper
2480-
; AVX2-NEXT: retq
2456+
; VEX-LABEL: fptosi_8f64_to_8i16:
2457+
; VEX: # %bb.0:
2458+
; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2459+
; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2460+
; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2461+
; VEX-NEXT: vzeroupper
2462+
; VEX-NEXT: retq
24812463
;
24822464
; AVX512F-LABEL: fptosi_8f64_to_8i16:
24832465
; AVX512F: # %bb.0:
@@ -2515,89 +2497,28 @@ define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
25152497
define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
25162498
; SSE-LABEL: fptoui_8f64_to_8i16:
25172499
; SSE: # %bb.0:
2518-
; SSE-NEXT: cvttsd2si %xmm3, %rax
2519-
; SSE-NEXT: movd %eax, %xmm4
2520-
; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1]
2521-
; SSE-NEXT: cvttsd2si %xmm3, %rax
2522-
; SSE-NEXT: movd %eax, %xmm3
2523-
; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2524-
; SSE-NEXT: cvttsd2si %xmm2, %rax
2525-
; SSE-NEXT: movd %eax, %xmm3
2526-
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
2527-
; SSE-NEXT: cvttsd2si %xmm2, %rax
2528-
; SSE-NEXT: movd %eax, %xmm2
2529-
; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2530-
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
2531-
; SSE-NEXT: cvttsd2si %xmm1, %rax
2532-
; SSE-NEXT: movd %eax, %xmm2
2533-
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
2534-
; SSE-NEXT: cvttsd2si %xmm1, %rax
2535-
; SSE-NEXT: movd %eax, %xmm1
2536-
; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2537-
; SSE-NEXT: cvttsd2si %xmm0, %rax
2538-
; SSE-NEXT: movd %eax, %xmm1
2539-
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2540-
; SSE-NEXT: cvttsd2si %xmm0, %rax
2541-
; SSE-NEXT: movd %eax, %xmm0
2542-
; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2543-
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2544-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
2545-
; SSE-NEXT: movdqa %xmm1, %xmm0
2500+
; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2501+
; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2502+
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2503+
; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
2504+
; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2505+
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2506+
; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2507+
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2508+
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2509+
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2510+
; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
2511+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2512+
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
25462513
; SSE-NEXT: retq
25472514
;
2548-
; AVX1-LABEL: fptoui_8f64_to_8i16:
2549-
; AVX1: # %bb.0:
2550-
; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
2551-
; AVX1-NEXT: vcmpltpd %ymm2, %ymm1, %ymm3
2552-
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
2553-
; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2554-
; AVX1-NEXT: vsubpd %ymm2, %ymm1, %ymm4
2555-
; AVX1-NEXT: vcvttpd2dq %ymm4, %xmm4
2556-
; AVX1-NEXT: vmovapd {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
2557-
; AVX1-NEXT: vxorpd %xmm5, %xmm4, %xmm4
2558-
; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
2559-
; AVX1-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm1
2560-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2561-
; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2562-
; AVX1-NEXT: vcmpltpd %ymm2, %ymm0, %ymm4
2563-
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6
2564-
; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4
2565-
; AVX1-NEXT: vsubpd %ymm2, %ymm0, %ymm2
2566-
; AVX1-NEXT: vcvttpd2dq %ymm2, %xmm2
2567-
; AVX1-NEXT: vxorpd %xmm5, %xmm2, %xmm2
2568-
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
2569-
; AVX1-NEXT: vblendvps %xmm4, %xmm0, %xmm2, %xmm0
2570-
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2571-
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2572-
; AVX1-NEXT: vzeroupper
2573-
; AVX1-NEXT: retq
2574-
;
2575-
; AVX2-LABEL: fptoui_8f64_to_8i16:
2576-
; AVX2: # %bb.0:
2577-
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
2578-
; AVX2-NEXT: vcmpltpd %ymm2, %ymm1, %ymm3
2579-
; AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4
2580-
; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2581-
; AVX2-NEXT: vsubpd %ymm2, %ymm1, %ymm4
2582-
; AVX2-NEXT: vcvttpd2dq %ymm4, %xmm4
2583-
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
2584-
; AVX2-NEXT: vxorpd %xmm5, %xmm4, %xmm4
2585-
; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
2586-
; AVX2-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm1
2587-
; AVX2-NEXT: vcmpltpd %ymm2, %ymm0, %ymm3
2588-
; AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4
2589-
; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2590-
; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm2
2591-
; AVX2-NEXT: vcvttpd2dq %ymm2, %xmm2
2592-
; AVX2-NEXT: vxorpd %xmm5, %xmm2, %xmm2
2593-
; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
2594-
; AVX2-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0
2595-
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2596-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2597-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2598-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2599-
; AVX2-NEXT: vzeroupper
2600-
; AVX2-NEXT: retq
2515+
; VEX-LABEL: fptoui_8f64_to_8i16:
2516+
; VEX: # %bb.0:
2517+
; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2518+
; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2519+
; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2520+
; VEX-NEXT: vzeroupper
2521+
; VEX-NEXT: retq
26012522
;
26022523
; AVX512F-LABEL: fptoui_8f64_to_8i16:
26032524
; AVX512F: # %bb.0:
@@ -2636,31 +2557,23 @@ define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
26362557
; SSE-LABEL: fptosi_16f32_to_16i8:
26372558
; SSE: # %bb.0:
26382559
; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2639-
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255]
2640-
; SSE-NEXT: pand %xmm4, %xmm3
26412560
; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2642-
; SSE-NEXT: pand %xmm4, %xmm2
2643-
; SSE-NEXT: packuswb %xmm3, %xmm2
2561+
; SSE-NEXT: packssdw %xmm3, %xmm2
26442562
; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2645-
; SSE-NEXT: pand %xmm4, %xmm1
26462563
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2647-
; SSE-NEXT: pand %xmm4, %xmm0
2648-
; SSE-NEXT: packuswb %xmm1, %xmm0
2649-
; SSE-NEXT: packuswb %xmm2, %xmm0
2564+
; SSE-NEXT: packssdw %xmm1, %xmm0
2565+
; SSE-NEXT: packsswb %xmm2, %xmm0
26502566
; SSE-NEXT: retq
26512567
;
26522568
; AVX1-LABEL: fptosi_16f32_to_16i8:
26532569
; AVX1: # %bb.0:
26542570
; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
26552571
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
26562572
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2657-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2658-
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
26592573
; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2660-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2661-
; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2662-
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
2663-
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2574+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2575+
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2576+
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
26642577
; AVX1-NEXT: vzeroupper
26652578
; AVX1-NEXT: retq
26662579
;
@@ -2669,13 +2582,10 @@ define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
26692582
; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
26702583
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
26712584
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2672-
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2673-
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
26742585
; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2675-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2676-
; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2677-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
2678-
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2586+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2587+
; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2588+
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
26792589
; AVX2-NEXT: vzeroupper
26802590
; AVX2-NEXT: retq
26812591
;
@@ -2693,30 +2603,22 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
26932603
; SSE-LABEL: fptoui_16f32_to_16i8:
26942604
; SSE: # %bb.0:
26952605
; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2696-
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255]
2697-
; SSE-NEXT: pand %xmm4, %xmm3
26982606
; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2699-
; SSE-NEXT: pand %xmm4, %xmm2
2700-
; SSE-NEXT: packuswb %xmm3, %xmm2
2607+
; SSE-NEXT: packssdw %xmm3, %xmm2
27012608
; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2702-
; SSE-NEXT: pand %xmm4, %xmm1
27032609
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2704-
; SSE-NEXT: pand %xmm4, %xmm0
2705-
; SSE-NEXT: packuswb %xmm1, %xmm0
2610+
; SSE-NEXT: packssdw %xmm1, %xmm0
27062611
; SSE-NEXT: packuswb %xmm2, %xmm0
27072612
; SSE-NEXT: retq
27082613
;
27092614
; AVX1-LABEL: fptoui_16f32_to_16i8:
27102615
; AVX1: # %bb.0:
27112616
; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
27122617
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2713-
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
2714-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2715-
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
2618+
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
27162619
; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2717-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2718-
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
2719-
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
2620+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2621+
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
27202622
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
27212623
; AVX1-NEXT: vzeroupper
27222624
; AVX1-NEXT: retq
@@ -2725,13 +2627,10 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
27252627
; AVX2: # %bb.0:
27262628
; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
27272629
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2728-
; AVX2-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
2729-
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2730-
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
2630+
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
27312631
; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
2732-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2733-
; AVX2-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
2734-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
2632+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2633+
; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
27352634
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
27362635
; AVX2-NEXT: vzeroupper
27372636
; AVX2-NEXT: retq

‎llvm/test/CodeGen/X86/vec_fp_to_int.ll

+45-203
Original file line numberDiff line numberDiff line change
@@ -2726,40 +2726,22 @@ define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
27262726
define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
27272727
; SSE-LABEL: fptosi_8f64_to_8i16:
27282728
; SSE: # %bb.0:
2729+
; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2730+
; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2731+
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
27292732
; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
27302733
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2731-
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2732-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2733-
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,1,3,4,5,6,7]
2734-
; SSE-NEXT: cvttpd2dq %xmm3, %xmm0
2735-
; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2736-
; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2737-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,2,0]
2738-
; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
2739-
; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
2734+
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2735+
; SSE-NEXT: packssdw %xmm2, %xmm0
27402736
; SSE-NEXT: retq
27412737
;
2742-
; AVX1-LABEL: fptosi_8f64_to_8i16:
2743-
; AVX1: # %bb.0:
2744-
; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
2745-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2746-
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2747-
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
2748-
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2749-
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2750-
; AVX1-NEXT: vzeroupper
2751-
; AVX1-NEXT: retq
2752-
;
2753-
; AVX2-LABEL: fptosi_8f64_to_8i16:
2754-
; AVX2: # %bb.0:
2755-
; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
2756-
; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
2757-
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2758-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2759-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2760-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2761-
; AVX2-NEXT: vzeroupper
2762-
; AVX2-NEXT: retq
2738+
; VEX-LABEL: fptosi_8f64_to_8i16:
2739+
; VEX: # %bb.0:
2740+
; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2741+
; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2742+
; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
2743+
; VEX-NEXT: vzeroupper
2744+
; VEX-NEXT: retq
27632745
;
27642746
; AVX512F-LABEL: fptosi_8f64_to_8i16:
27652747
; AVX512F: # %bb.0:
@@ -2797,146 +2779,28 @@ define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) {
27972779
define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) {
27982780
; SSE-LABEL: fptoui_8f64_to_8i16:
27992781
; SSE: # %bb.0:
2800-
; SSE-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero
2801-
; SSE-NEXT: movapd %xmm1, %xmm5
2802-
; SSE-NEXT: subsd %xmm4, %xmm5
2803-
; SSE-NEXT: cvttsd2si %xmm5, %rcx
2804-
; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
2805-
; SSE-NEXT: xorq %rax, %rcx
2806-
; SSE-NEXT: cvttsd2si %xmm1, %rdx
2807-
; SSE-NEXT: ucomisd %xmm4, %xmm1
2808-
; SSE-NEXT: cmovaeq %rcx, %rdx
2809-
; SSE-NEXT: movq %rdx, %xmm5
2810-
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
2811-
; SSE-NEXT: movapd %xmm1, %xmm6
2812-
; SSE-NEXT: subsd %xmm4, %xmm6
2813-
; SSE-NEXT: cvttsd2si %xmm6, %rcx
2814-
; SSE-NEXT: xorq %rax, %rcx
2815-
; SSE-NEXT: cvttsd2si %xmm1, %rdx
2816-
; SSE-NEXT: ucomisd %xmm4, %xmm1
2817-
; SSE-NEXT: cmovaeq %rcx, %rdx
2818-
; SSE-NEXT: movq %rdx, %xmm1
2819-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm1[0]
2820-
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
2821-
; SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,2,2,3,4,5,6,7]
2822-
; SSE-NEXT: movapd %xmm0, %xmm1
2823-
; SSE-NEXT: subsd %xmm4, %xmm1
2824-
; SSE-NEXT: cvttsd2si %xmm1, %rcx
2825-
; SSE-NEXT: xorq %rax, %rcx
2826-
; SSE-NEXT: cvttsd2si %xmm0, %rdx
2827-
; SSE-NEXT: ucomisd %xmm4, %xmm0
2828-
; SSE-NEXT: cmovaeq %rcx, %rdx
2829-
; SSE-NEXT: movq %rdx, %xmm1
2830-
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
2831-
; SSE-NEXT: movapd %xmm0, %xmm6
2832-
; SSE-NEXT: subsd %xmm4, %xmm6
2833-
; SSE-NEXT: cvttsd2si %xmm6, %rcx
2834-
; SSE-NEXT: xorq %rax, %rcx
2835-
; SSE-NEXT: cvttsd2si %xmm0, %rdx
2836-
; SSE-NEXT: ucomisd %xmm4, %xmm0
2837-
; SSE-NEXT: cmovaeq %rcx, %rdx
2838-
; SSE-NEXT: movq %rdx, %xmm0
2839-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2840-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
2841-
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
2842-
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
2843-
; SSE-NEXT: movapd %xmm3, %xmm0
2844-
; SSE-NEXT: subsd %xmm4, %xmm0
2845-
; SSE-NEXT: cvttsd2si %xmm0, %rcx
2846-
; SSE-NEXT: xorq %rax, %rcx
2847-
; SSE-NEXT: cvttsd2si %xmm3, %rdx
2848-
; SSE-NEXT: ucomisd %xmm4, %xmm3
2849-
; SSE-NEXT: cmovaeq %rcx, %rdx
2850-
; SSE-NEXT: movq %rdx, %xmm0
2851-
; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
2852-
; SSE-NEXT: movapd %xmm3, %xmm5
2853-
; SSE-NEXT: subsd %xmm4, %xmm5
2854-
; SSE-NEXT: cvttsd2si %xmm5, %rcx
2855-
; SSE-NEXT: xorq %rax, %rcx
2856-
; SSE-NEXT: cvttsd2si %xmm3, %rdx
2857-
; SSE-NEXT: ucomisd %xmm4, %xmm3
2858-
; SSE-NEXT: cmovaeq %rcx, %rdx
2859-
; SSE-NEXT: movq %rdx, %xmm3
2860-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
2782+
; SSE-NEXT: cvttpd2dq %xmm3, %xmm3
2783+
; SSE-NEXT: cvttpd2dq %xmm2, %xmm2
2784+
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
2785+
; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
2786+
; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
2787+
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2788+
; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
2789+
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
2790+
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2791+
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
2792+
; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
28612793
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2862-
; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,1,0,2,4,5,6,7]
2863-
; SSE-NEXT: movapd %xmm2, %xmm0
2864-
; SSE-NEXT: subsd %xmm4, %xmm0
2865-
; SSE-NEXT: cvttsd2si %xmm0, %rcx
2866-
; SSE-NEXT: xorq %rax, %rcx
2867-
; SSE-NEXT: cvttsd2si %xmm2, %rdx
2868-
; SSE-NEXT: ucomisd %xmm4, %xmm2
2869-
; SSE-NEXT: cmovaeq %rcx, %rdx
2870-
; SSE-NEXT: movq %rdx, %xmm0
2871-
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
2872-
; SSE-NEXT: movapd %xmm2, %xmm5
2873-
; SSE-NEXT: subsd %xmm4, %xmm5
2874-
; SSE-NEXT: cvttsd2si %xmm5, %rcx
2875-
; SSE-NEXT: xorq %rax, %rcx
2876-
; SSE-NEXT: cvttsd2si %xmm2, %rax
2877-
; SSE-NEXT: ucomisd %xmm4, %xmm2
2878-
; SSE-NEXT: cmovaeq %rcx, %rax
2879-
; SSE-NEXT: movq %rax, %xmm2
28802794
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2881-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2882-
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
2883-
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
2884-
; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
28852795
; SSE-NEXT: retq
28862796
;
2887-
; AVX1-LABEL: fptoui_8f64_to_8i16:
2888-
; AVX1: # %bb.0:
2889-
; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
2890-
; AVX1-NEXT: vcmpltpd %ymm2, %ymm1, %ymm3
2891-
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
2892-
; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2893-
; AVX1-NEXT: vsubpd %ymm2, %ymm1, %ymm4
2894-
; AVX1-NEXT: vcvttpd2dq %ymm4, %xmm4
2895-
; AVX1-NEXT: vmovapd {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
2896-
; AVX1-NEXT: vxorpd %xmm5, %xmm4, %xmm4
2897-
; AVX1-NEXT: vcvttpd2dq %ymm1, %xmm1
2898-
; AVX1-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm1
2899-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
2900-
; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
2901-
; AVX1-NEXT: vcmpltpd %ymm2, %ymm0, %ymm4
2902-
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6
2903-
; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4
2904-
; AVX1-NEXT: vsubpd %ymm2, %ymm0, %ymm2
2905-
; AVX1-NEXT: vcvttpd2dq %ymm2, %xmm2
2906-
; AVX1-NEXT: vxorpd %xmm5, %xmm2, %xmm2
2907-
; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0
2908-
; AVX1-NEXT: vblendvps %xmm4, %xmm0, %xmm2, %xmm0
2909-
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2910-
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2911-
; AVX1-NEXT: vzeroupper
2912-
; AVX1-NEXT: retq
2913-
;
2914-
; AVX2-LABEL: fptoui_8f64_to_8i16:
2915-
; AVX2: # %bb.0:
2916-
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9]
2917-
; AVX2-NEXT: vcmpltpd %ymm2, %ymm1, %ymm3
2918-
; AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4
2919-
; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2920-
; AVX2-NEXT: vsubpd %ymm2, %ymm1, %ymm4
2921-
; AVX2-NEXT: vcvttpd2dq %ymm4, %xmm4
2922-
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
2923-
; AVX2-NEXT: vxorpd %xmm5, %xmm4, %xmm4
2924-
; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1
2925-
; AVX2-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm1
2926-
; AVX2-NEXT: vcmpltpd %ymm2, %ymm0, %ymm3
2927-
; AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4
2928-
; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
2929-
; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm2
2930-
; AVX2-NEXT: vcvttpd2dq %ymm2, %xmm2
2931-
; AVX2-NEXT: vxorpd %xmm5, %xmm2, %xmm2
2932-
; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0
2933-
; AVX2-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0
2934-
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2935-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
2936-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
2937-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2938-
; AVX2-NEXT: vzeroupper
2939-
; AVX2-NEXT: retq
2797+
; VEX-LABEL: fptoui_8f64_to_8i16:
2798+
; VEX: # %bb.0:
2799+
; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1
2800+
; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0
2801+
; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2802+
; VEX-NEXT: vzeroupper
2803+
; VEX-NEXT: retq
29402804
;
29412805
; AVX512F-LABEL: fptoui_8f64_to_8i16:
29422806
; AVX512F: # %bb.0:
@@ -2975,31 +2839,23 @@ define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
29752839
; SSE-LABEL: fptosi_16f32_to_16i8:
29762840
; SSE: # %bb.0:
29772841
; SSE-NEXT: cvttps2dq %xmm3, %xmm3
2978-
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255]
2979-
; SSE-NEXT: pand %xmm4, %xmm3
29802842
; SSE-NEXT: cvttps2dq %xmm2, %xmm2
2981-
; SSE-NEXT: pand %xmm4, %xmm2
2982-
; SSE-NEXT: packuswb %xmm3, %xmm2
2843+
; SSE-NEXT: packssdw %xmm3, %xmm2
29832844
; SSE-NEXT: cvttps2dq %xmm1, %xmm1
2984-
; SSE-NEXT: pand %xmm4, %xmm1
29852845
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
2986-
; SSE-NEXT: pand %xmm4, %xmm0
2987-
; SSE-NEXT: packuswb %xmm1, %xmm0
2988-
; SSE-NEXT: packuswb %xmm2, %xmm0
2846+
; SSE-NEXT: packssdw %xmm1, %xmm0
2847+
; SSE-NEXT: packsswb %xmm2, %xmm0
29892848
; SSE-NEXT: retq
29902849
;
29912850
; AVX1-LABEL: fptosi_16f32_to_16i8:
29922851
; AVX1: # %bb.0:
29932852
; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
29942853
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
29952854
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2996-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2997-
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
29982855
; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
2999-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3000-
; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
3001-
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
3002-
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2856+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2857+
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2858+
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
30032859
; AVX1-NEXT: vzeroupper
30042860
; AVX1-NEXT: retq
30052861
;
@@ -3008,13 +2864,10 @@ define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) {
30082864
; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
30092865
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
30102866
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3011-
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3012-
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
30132867
; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
3014-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
3015-
; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
3016-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
3017-
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2868+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2869+
; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2870+
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
30182871
; AVX2-NEXT: vzeroupper
30192872
; AVX2-NEXT: retq
30202873
;
@@ -3032,16 +2885,11 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
30322885
; SSE-LABEL: fptoui_16f32_to_16i8:
30332886
; SSE: # %bb.0:
30342887
; SSE-NEXT: cvttps2dq %xmm3, %xmm3
3035-
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255]
3036-
; SSE-NEXT: pand %xmm4, %xmm3
30372888
; SSE-NEXT: cvttps2dq %xmm2, %xmm2
3038-
; SSE-NEXT: pand %xmm4, %xmm2
3039-
; SSE-NEXT: packuswb %xmm3, %xmm2
2889+
; SSE-NEXT: packssdw %xmm3, %xmm2
30402890
; SSE-NEXT: cvttps2dq %xmm1, %xmm1
3041-
; SSE-NEXT: pand %xmm4, %xmm1
30422891
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
3043-
; SSE-NEXT: pand %xmm4, %xmm0
3044-
; SSE-NEXT: packuswb %xmm1, %xmm0
2892+
; SSE-NEXT: packssdw %xmm1, %xmm0
30452893
; SSE-NEXT: packuswb %xmm2, %xmm0
30462894
; SSE-NEXT: retq
30472895
;
@@ -3050,12 +2898,9 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
30502898
; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1
30512899
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
30522900
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
3053-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3054-
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
30552901
; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
3056-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
3057-
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
3058-
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
2902+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2903+
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
30592904
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
30602905
; AVX1-NEXT: vzeroupper
30612906
; AVX1-NEXT: retq
@@ -3065,12 +2910,9 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) {
30652910
; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1
30662911
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
30672912
; AVX2-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
3068-
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
3069-
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
30702913
; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
3071-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
3072-
; AVX2-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
3073-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
2914+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
2915+
; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
30742916
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
30752917
; AVX2-NEXT: vzeroupper
30762918
; AVX2-NEXT: retq

0 commit comments

Comments
 (0)
Please sign in to comment.