Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5727,28 +5727,28 @@ // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). // Sel = Src < 0x8000000000000000 - // Val = select Sel, Src, Src - 0x8000000000000000 - // Ofs = select Sel, 0, 0x8000000000000000 - // Result = fp_to_sint(Val) ^ Ofs + // FltOfs = select Sel, 0, 0x8000000000000000 + // IntOfs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue SrcBiased; - if (Node->isStrictFPOpcode()) - SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, - { Node->getOperand(0), Src, Cst }); - else - SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); - SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), - DAG.getConstant(SignMask, dl, DstVT)); + SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel, + DAG.getConstantFP(0.0, dl, SrcVT), Cst); + SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel, + DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); SDValue SInt; if (Node->isStrictFPOpcode()) { + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, FltOfs }); SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, - { SrcBiased.getValue(1), Val }); + { Val.getValue(1), Val }); Chain = SInt.getValue(1); - } else + } else { + SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs); SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); - Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); + } + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) Index: test/CodeGen/SystemZ/fp-strict-conv-10.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-conv-10.ll +++ test/CodeGen/SystemZ/fp-strict-conv-10.ll @@ -18,19 +18,18 @@ ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: le %f2, 0(%r1) -; CHECK-NEXT: ler %f1, %f0 -; CHECK-NEXT: sebr %f1, %f2 -; CHECK-NEXT: cebr %f0, %f2 +; CHECK-NEXT: le %f1, 0(%r1) +; CHECK-NEXT: cebr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: jl .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ler %f0, %f1 +; CHECK-NEXT: llilh %r0, 32768 ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: jnl .LBB0_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lzer %f1 ; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cfebr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -44,19 +43,18 @@ ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI1_0 -; CHECK-NEXT: ldeb %f2, 0(%r1) -; CHECK-NEXT: ldr %f1, %f0 -; CHECK-NEXT: sdbr %f1, %f2 -; CHECK-NEXT: cdbr %f0, %f2 +; CHECK-NEXT: ldeb %f1, 0(%r1) +; CHECK-NEXT: cdbr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: jl .LBB1_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ldr %f0, %f1 +; CHECK-NEXT: llilh %r0, 32768 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB1_4 +; CHECK-NEXT: jnl .LBB1_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lzdr %f1 ; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cfdbr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -72,19 +70,18 @@ ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) ; CHECK-NEXT: larl %r1, .LCPI2_0 -; CHECK-NEXT: lxeb %f4, 0(%r1) -; CHECK-NEXT: lxr %f1, %f0 -; CHECK-NEXT: sxbr %f1, %f4 -; CHECK-NEXT: cxbr %f0, %f4 +; CHECK-NEXT: lxeb %f1, 0(%r1) +; CHECK-NEXT: cxbr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: jl .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lxr %f0, %f1 +; CHECK-NEXT: llilh %r0, 32768 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: jnl .LBB2_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lzxr %f1 ; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cfxbr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 Index: test/CodeGen/SystemZ/fp-strict-conv-12.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-conv-12.ll +++ test/CodeGen/SystemZ/fp-strict-conv-12.ll @@ -17,19 +17,18 @@ ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: le %f2, 0(%r1) -; CHECK-NEXT: ler %f1, %f0 -; CHECK-NEXT: sebr %f1, %f2 -; CHECK-NEXT: cebr %f0, %f2 +; CHECK-NEXT: le %f1, 0(%r1) +; CHECK-NEXT: cebr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: jl .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ler %f0, %f1 +; CHECK-NEXT: llihh %r0, 32768 ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: jnl .LBB0_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lzer %f1 ; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cgebr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -43,19 +42,18 @@ ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI1_0 -; CHECK-NEXT: ldeb %f2, 0(%r1) -; CHECK-NEXT: ldr %f1, %f0 -; CHECK-NEXT: sdbr %f1, %f2 -; CHECK-NEXT: cdbr %f0, %f2 +; CHECK-NEXT: ldeb %f1, 0(%r1) +; CHECK-NEXT: cdbr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: jl .LBB1_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ldr %f0, %f1 +; CHECK-NEXT: llihh %r0, 32768 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB1_4 +; CHECK-NEXT: jnl .LBB1_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lzdr %f1 ; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cgdbr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -71,19 +69,18 @@ ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) ; CHECK-NEXT: larl %r1, .LCPI2_0 -; CHECK-NEXT: lxeb %f4, 0(%r1) -; CHECK-NEXT: lxr %f1, %f0 -; CHECK-NEXT: sxbr %f1, %f4 -; CHECK-NEXT: cxbr %f0, %f4 +; CHECK-NEXT: lxeb %f1, 0(%r1) +; CHECK-NEXT: cxbr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: jl .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lxr %f0, %f1 +; CHECK-NEXT: llihh %r0, 32768 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: jnl .LBB2_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lzxr %f1 ; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cgxbr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 Index: test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/fp-intrinsics.ll +++ test/CodeGen/X86/fp-intrinsics.ll @@ -290,27 +290,23 @@ ; unknown. The expansion should have only one conversion instruction. ; Verify that no gross errors happen. ; CHECK-LABEL: @f20u -; NO-FMA: cmpltsd -; NO-FMA: movapd -; NO-FMA: andpd -; NO-FMA: xorl ; NO-FMA: ucomisd -; NO-FMA: subsd -; NO-FMA: andnpd -; NO-FMA: orpd -; NO-FMA: cvttsd2si ; NO-FMA: setae ; NO-FMA: shll +; NO-FMA: movapd +; NO-FMA: cmpltsd +; NO-FMA: andnpd +; NO-FMA: subsd +; NO-FMA: cvttsd2si ; NO-FMA: xorl ; -; HAS-FMA: vcmpltsd -; HAS-FMA: vsubsd -; HAS-FMA: vblendvpd -; HAS-FMA: vcvttsd2si -; HAS-FMA: xorl ; HAS-FMA: vucomisd ; HAS-FMA: setae ; HAS-FMA: shll +; HAS-FMA: vcmpltsd +; HAS-FMA: vandnpd +; HAS-FMA: vsubsd +; HAS-FMA: vcvttsd2si ; HAS-FMA: xorl define i32 @f20u(double %x) { entry: Index: test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -4296,12 +4296,18 @@ define <1 x i32> @constrained_vector_fptoui_v1i32_v1f32() { ; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %eax ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v1i32_v1f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcvttss2si %xmm0, %eax ; AVX-NEXT: retq entry: %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32( @@ -4313,17 +4319,27 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() { ; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %eax ; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %eax ; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v2i32_v2f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttss2si %xmm1, %eax +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcvttss2si %xmm0, %ecx ; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -4337,24 +4353,38 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() { ; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax -; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: movd %eax, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %eax ; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %eax ; CHECK-NEXT: movd %eax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v3i32_v3f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttss2si %xmm1, %eax +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttss2si %xmm1, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vcvttss2si %xmm0, %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm0 ; AVX-NEXT: retq entry: %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32( @@ -4367,29 +4397,47 @@ define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() { ; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax -; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %eax ; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %eax ; CHECK-NEXT: movd %eax, %xmm2 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %eax +; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm3 +; CHECK-NEXT: cvttss2si %xmm3, %eax ; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v4i32_v4f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttss2si %xmm1, %eax +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttss2si %xmm1, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm2 +; AVX-NEXT: vcvttss2si %xmm2, %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vcvttss2si %xmm0, %eax +; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 ; AVX-NEXT: retq entry: %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32( @@ -4402,12 +4450,18 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() { ; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rax ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v1i64_v1f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcvttss2si %xmm0, %rax ; AVX-NEXT: retq entry: %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32( @@ -4419,20 +4473,30 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() { ; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rax ; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v2i64_v2f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttss2si %xmm1, %rax ; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vcvttss2si %xmm0, %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq entry: %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32( @@ -4444,21 +4508,35 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() { ; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rdx -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rcx +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rax +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rdx +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 +; CHECK-NEXT: cvttss2si %xmm1, %rcx ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v3i64_v3f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttss2si %xmm1, %rax ; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm2 +; AVX-NEXT: vcvttss2si %xmm2, %rax +; AVX-NEXT: vmovq %rax, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vcvttss2si %xmm0, %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32( @@ -4471,31 +4549,49 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() { ; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %rax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %rax ; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: cvttss2si {{.*}}(%rip), %rax +; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm1, %xmm3 +; CHECK-NEXT: cvttss2si %xmm3, %rax ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v4i64_v4f32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttss2si %xmm1, %rax ; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm2 +; AVX-NEXT: vcvttss2si %xmm2, %rax ; AVX-NEXT: vmovq %rax, %xmm2 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm2, %xmm2 +; AVX-NEXT: vcvttss2si %xmm2, %rax +; AVX-NEXT: vmovq %rax, %xmm2 +; AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; AVX-NEXT: vcvttss2si %xmm0, %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: retq entry: %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32( @@ -4508,12 +4604,18 @@ define <1 x i32> @constrained_vector_fptoui_v1i32_v1f64() { ; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %eax ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v1i32_v1f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax +; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcvttsd2si %xmm0, %eax ; AVX-NEXT: retq entry: %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64( @@ -4525,17 +4627,27 @@ define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() { ; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %eax ; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %eax ; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v2i32_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx +; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttsd2si %xmm1, %eax +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcvttsd2si %xmm0, %ecx ; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -4549,24 +4661,38 @@ define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() { ; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax -; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: xorpd %xmm1, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %eax +; CHECK-NEXT: movd %eax, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %eax ; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %eax ; CHECK-NEXT: movd %eax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v3i32_v3f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttsd2si %xmm1, %eax +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttsd2si %xmm1, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vcvttsd2si %xmm0, %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm0 ; AVX-NEXT: retq entry: %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64( @@ -4579,29 +4705,47 @@ define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() { ; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax -; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %eax ; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %eax ; CHECK-NEXT: movd %eax, %xmm2 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %eax +; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm3 +; CHECK-NEXT: cvttsd2si %xmm3, %eax ; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v4i32_v4f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttsd2si %xmm1, %eax +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttsd2si %xmm1, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm2 +; AVX-NEXT: vcvttsd2si %xmm2, %eax +; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vcvttsd2si %xmm0, %eax +; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 ; AVX-NEXT: retq entry: %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64( @@ -4614,12 +4758,18 @@ define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() { ; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rax ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v1i64_v1f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vcvttsd2si %xmm0, %rax ; AVX-NEXT: retq entry: %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64( @@ -4631,20 +4781,30 @@ define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() { ; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rax ; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %rax ; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v2i64_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttsd2si %xmm1, %rax ; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vcvttsd2si %xmm0, %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq entry: %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64( @@ -4656,21 +4816,35 @@ define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() { ; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rdx -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rcx +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rax +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rdx +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %rcx ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v3i64_v3f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttsd2si %xmm1, %rax ; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm2 +; AVX-NEXT: vcvttsd2si %xmm2, %rax +; AVX-NEXT: vmovq %rax, %xmm2 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vcvttsd2si %xmm0, %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64( @@ -4683,31 +4857,49 @@ define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() { ; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: xorpd %xmm1, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %rax +; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %rax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %rax ; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: cvttsd2si {{.*}}(%rip), %rax +; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm3 +; CHECK-NEXT: cvttsd2si %xmm3, %rax ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fptoui_v4i64_v4f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm0 -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax -; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vcvttsd2si %xmm1, %rax ; AVX-NEXT: vmovq %rax, %xmm1 -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm2 +; AVX-NEXT: vcvttsd2si %xmm2, %rax ; AVX-NEXT: vmovq %rax, %xmm2 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm2 +; AVX-NEXT: vcvttsd2si %xmm2, %rax +; AVX-NEXT: vmovq %rax, %xmm2 +; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; AVX-NEXT: vsubsd %xmm0, %xmm3, %xmm0 +; AVX-NEXT: vcvttsd2si %xmm0, %rax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: retq entry: %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(