diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2384,7 +2384,8 @@ std::swap(N0, N1); // TODO: Should this apply to scalar select too? - if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse()) + if ((N1.getOpcode() != ISD::VSELECT && N1.getOpcode() != ISD::SELECT) || + !N1.hasOneUse()) return SDValue(); // We can't hoist all instructions because of immediate UB (not speculatable). diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35995,12 +35995,12 @@ EVT VT) const { // TODO: This is too general. There are cases where pre-AVX512 codegen would // benefit. The transform may also be profitable for scalar code. - if (!Subtarget.hasAVX512()) - return false; - if (!Subtarget.hasVLX() && !VT.is512BitVector()) - return false; - if (!VT.isVector() || VT.getScalarType() == MVT::i1) - return false; + if (VT.isVector()) { + if (!Subtarget.hasAVX512()) + return false; + if (!Subtarget.hasVLX() && !VT.is512BitVector()) + return false; + } return true; } diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -378,23 +378,27 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: cmpeqz_v2i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmov r1, r2, d2 +; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: cset r1, eq ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csinc r0, r1, zr, ne -; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: orreq r1, r1, #1 +; CHECK-NEXT: and r0, r1, #1 ; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: vmov r0, r2, d1 +; CHECK-NEXT: vmov r0, r2, d3 ; CHECK-NEXT: orrs r0, r2 -; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: orrs r2, r3 ; CHECK-NEXT: cset r2, eq ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csinc r0, r2, zr, ne +; CHECK-NEXT: it eq +; CHECK-NEXT: orreq r2, r2, #1 +; CHECK-NEXT: and r0, r2, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 @@ -417,21 +421,27 @@ ; CHECK-NEXT: eors r0, r2 ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: vmov r1, r2, d0 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: vmov r12, r2, d5 -; CHECK-NEXT: csinc r0, r0, zr, ne -; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: cset r1, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: orreq r1, r1, #1 +; CHECK-NEXT: and r0, r1, #1 ; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r3, r0, d3 ; CHECK-NEXT: eors r0, r2 ; CHECK-NEXT: eor.w r2, r3, r12 ; CHECK-NEXT: orrs r0, r2 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: csinc r0, r0, zr, ne +; CHECK-NEXT: cset r2, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: orreq r2, r2, #1 +; CHECK-NEXT: and r0, r2, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll @@ -458,26 +458,28 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: cmpeqz_v2i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmov r1, r2, d2 +; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: cset r1, eq ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: eoreq r1, r1, #1 -; CHECK-NEXT: rsbs r0, r1, #0 +; CHECK-NEXT: and r0, r1, #1 +; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: vmov r0, r2, d1 +; CHECK-NEXT: vmov r0, r2, d3 ; CHECK-NEXT: orrs r0, r2 -; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: orrs r2, r3 ; CHECK-NEXT: cset r2, eq ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: eoreq r2, r2, #1 -; CHECK-NEXT: rsbs r0, r2, #0 +; CHECK-NEXT: and r0, r2, #1 +; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -499,23 +501,27 @@ ; CHECK-NEXT: eors r0, r2 ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: vmov r1, r2, d0 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 +; CHECK-NEXT: vmov r12, r2, d5 +; CHECK-NEXT: cset r1, eq +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: eoreq r0, r0, #1 +; CHECK-NEXT: eoreq r1, r1, #1 +; CHECK-NEXT: and r0, r1, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: vmov r12, r2, d5 ; CHECK-NEXT: vmov r3, r0, d3 ; CHECK-NEXT: eors r0, r2 ; CHECK-NEXT: eor.w r2, r3, r12 ; CHECK-NEXT: orrs r0, r2 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: cset r2, eq +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: eoreq r0, r0, #1 +; CHECK-NEXT: eoreq r2, r2, #1 +; CHECK-NEXT: and r0, r2, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll --- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll @@ -41,32 +41,32 @@ ; CHECK-NEXT: adds.w r12, r2, r0 ; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: adc.w lr, r3, r1 +; CHECK-NEXT: lsrs r1, r1, #31 ; CHECK-NEXT: subs.w r2, r12, r2 +; CHECK-NEXT: and r1, r1, #1 ; CHECK-NEXT: sbcs.w r2, lr, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: eormi r2, r2, #1 -; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: eorlt r1, r1, #1 +; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: vmov r1, r3, d3 ; CHECK-NEXT: adds r1, r1, r0 ; CHECK-NEXT: adc.w r5, r4, r3 ; CHECK-NEXT: subs r0, r1, r0 +; CHECK-NEXT: lsr.w r3, r3, #31 ; CHECK-NEXT: sbcs.w r0, r5, r4 +; CHECK-NEXT: and r3, r3, #1 ; CHECK-NEXT: vmov q0[2], q0[0], r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: eormi r0, r0, #1 -; CHECK-NEXT: asr.w r1, lr, #31 -; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: vmov q0[3], q0[1], lr, r5 +; CHECK-NEXT: it lt +; CHECK-NEXT: eorlt r3, r3, #1 +; CHECK-NEXT: rsbs r0, r3, #0 ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: asrs r0, r5, #31 -; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: asr.w r1, lr, #31 ; CHECK-NEXT: vmsr p0, r2 +; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r5 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 ; CHECK-NEXT: adr r0, .LCPI3_0 ; CHECK-NEXT: vldrw.u32 q2, [r0] @@ -179,47 +179,49 @@ define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: ssub_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: vmov r2, r3, d2 -; CHECK-NEXT: vmov r1, r0, d0 -; CHECK-NEXT: vmov r4, r5, d1 -; CHECK-NEXT: subs.w r12, r1, r2 -; CHECK-NEXT: sbc.w lr, r0, r3 -; CHECK-NEXT: subs.w r1, r12, r1 -; CHECK-NEXT: sbcs.w r0, lr, r0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov lr, r12, d2 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov r2, r1, d0 +; CHECK-NEXT: rsbs.w r3, lr, #0 +; CHECK-NEXT: sbcs.w r3, r0, r12 +; CHECK-NEXT: cset r3, lt +; CHECK-NEXT: subs.w lr, r2, lr +; CHECK-NEXT: sbc.w r12, r1, r12 +; CHECK-NEXT: subs.w r2, lr, r2 +; CHECK-NEXT: sbcs.w r1, r12, r1 ; CHECK-NEXT: it lt -; CHECK-NEXT: eorlt r0, r0, #1 +; CHECK-NEXT: eorlt r3, r3, #1 +; CHECK-NEXT: and r1, r3, #1 ; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: subs r6, r4, r2 -; CHECK-NEXT: sbc.w r7, r5, r3 -; CHECK-NEXT: subs r4, r6, r4 -; CHECK-NEXT: sbcs.w r4, r7, r5 -; CHECK-NEXT: vmov q0[2], q0[0], r12, r6 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: sbcs.w r2, r1, r3 -; CHECK-NEXT: bfi r1, r0, #0, #8 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: rsbs r4, r2, #0 +; CHECK-NEXT: sbcs.w r4, r0, r3 +; CHECK-NEXT: bfi r0, r1, #0, #8 +; CHECK-NEXT: vmov r4, r5, d1 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: sbc.w r3, r5, r3 +; CHECK-NEXT: subs r4, r2, r4 +; CHECK-NEXT: sbcs.w r4, r3, r5 ; CHECK-NEXT: it lt -; CHECK-NEXT: eorlt r4, r4, #1 -; CHECK-NEXT: rsbs r0, r4, #0 -; CHECK-NEXT: bfi r1, r0, #8, #8 -; CHECK-NEXT: asrs r0, r7, #31 -; CHECK-NEXT: vmsr p0, r1 -; CHECK-NEXT: asr.w r1, lr, #31 +; CHECK-NEXT: eorlt r1, r1, #1 +; CHECK-NEXT: and r1, r1, #1 +; CHECK-NEXT: vmov q0[2], q0[0], lr, r2 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: vmov q0[3], q0[1], r12, r3 +; CHECK-NEXT: bfi r0, r1, #8, #8 +; CHECK-NEXT: asr.w r1, r12, #31 +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: asrs r0, r3, #31 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 -; CHECK-NEXT: vmov q0[3], q0[1], lr, r7 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 ; CHECK-NEXT: adr r0, .LCPI11_0 ; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: veor q1, q1, q2 ; CHECK-NEXT: vpsel q0, q1, q0 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI11_0: diff --git a/llvm/test/CodeGen/X86/add-cmov.ll b/llvm/test/CodeGen/X86/add-cmov.ll --- a/llvm/test/CodeGen/X86/add-cmov.ll +++ b/llvm/test/CodeGen/X86/add-cmov.ll @@ -59,11 +59,11 @@ define i8 @select_consts_i8(i8 %offset, i1 %b) { ; CHECK-LABEL: select_consts_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 45(%rdi), %eax +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: movl $45, %eax -; CHECK-NEXT: cmovnel %ecx, %eax -; CHECK-NEXT: addb %dil, %al +; CHECK-NEXT: cmovnel %edi, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %s = select i1 %b, i8 0, i8 45 @@ -109,9 +109,10 @@ define i32 @select_0_1_i32(i32 %offset, i64 %x) { ; CHECK-LABEL: select_0_1_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %eax ; CHECK-NEXT: cmpq $42, %rsi -; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: cmovael %edi, %eax ; CHECK-NEXT: retq %b = icmp ugt i64 %x, 41 %s = select i1 %b, i32 0, i32 1 @@ -122,9 +123,10 @@ define i32 @select_1_0_i32(i32 %offset, i64 %x) { ; CHECK-LABEL: select_1_0_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 1(%rdi), %eax ; CHECK-NEXT: cmpq $42, %rsi -; CHECK-NEXT: sbbl $-1, %eax +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: retq %b = icmp ugt i64 %x, 41 %s = select i1 %b, i32 1, i32 0 diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -1886,9 +1886,12 @@ ; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 ; X86-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) -; X86-NEXT: shrl $31, %eax +; X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %ecx +; X86-NEXT: leal 4(%ecx), %edx +; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmovnsl %ecx, %edx ; X86-NEXT: fildll {{[0-9]+}}(%esp) -; X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-NEXT: fadds (%edx) ; X86-NEXT: fstps {{[0-9]+}}(%esp) ; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -8,23 +8,19 @@ ; X86-LABEL: select00: ; X86: # %bb.0: ; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp) -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: je .LBB0_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: vmovdqa64 %zmm0, %zmm1 +; X86-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; X86-NEXT: .LBB0_2: -; X86-NEXT: vpxord %zmm1, %zmm0, %zmm0 ; X86-NEXT: retl ; ; X64-LABEL: select00: ; X64: # %bb.0: ; X64-NEXT: cmpl $255, %edi -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: je .LBB0_2 ; X64-NEXT: # %bb.1: -; X64-NEXT: vmovdqa64 %zmm0, %zmm1 +; X64-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; X64-NEXT: .LBB0_2: -; X64-NEXT: vpxord %zmm1, %zmm0, %zmm0 ; X64-NEXT: retq %cmpres = icmp eq i32 %a, 255 %selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b @@ -36,23 +32,19 @@ ; X86-LABEL: select01: ; X86: # %bb.0: ; X86-NEXT: cmpl $255, {{[0-9]+}}(%esp) -; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: je .LBB1_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: vmovdqa64 %zmm0, %zmm1 +; X86-NEXT: vpxorq %zmm0, %zmm0, %zmm0 ; X86-NEXT: .LBB1_2: -; X86-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; X86-NEXT: retl ; ; X64-LABEL: select01: ; X64: # %bb.0: ; X64-NEXT: cmpl $255, %edi -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: je .LBB1_2 ; X64-NEXT: # %bb.1: -; X64-NEXT: vmovdqa64 %zmm0, %zmm1 +; X64-NEXT: vpxorq %zmm0, %zmm0, %zmm0 ; X64-NEXT: .LBB1_2: -; X64-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; X64-NEXT: retq %cmpres = icmp eq i32 %a, 255 %selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -583,16 +583,13 @@ ; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpcmpgtd %ymm3, %ymm1, %k1 -; AVX512F-NEXT: testb $1, %dil -; AVX512F-NEXT: jne .LBB7_1 -; AVX512F-NEXT: # %bb.2: -; AVX512F-NEXT: kxorw %k0, %k0, %k2 -; AVX512F-NEXT: jmp .LBB7_3 -; AVX512F-NEXT: .LBB7_1: -; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm0, %k2 -; AVX512F-NEXT: .LBB7_3: ; AVX512F-NEXT: korw %k0, %k1, %k0 -; AVX512F-NEXT: korw %k2, %k0, %k0 +; AVX512F-NEXT: testb $1, %dil +; AVX512F-NEXT: je .LBB7_2 +; AVX512F-NEXT: # %bb.1: +; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: .LBB7_2: ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: # kill: def $al killed $al killed $eax ; AVX512F-NEXT: vzeroupper @@ -603,16 +600,13 @@ ; AVX512BW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpcmpgtd %ymm3, %ymm1, %k1 -; AVX512BW-NEXT: testb $1, %dil -; AVX512BW-NEXT: jne .LBB7_1 -; AVX512BW-NEXT: # %bb.2: -; AVX512BW-NEXT: kxorw %k0, %k0, %k2 -; AVX512BW-NEXT: jmp .LBB7_3 -; AVX512BW-NEXT: .LBB7_1: -; AVX512BW-NEXT: vpcmpeqd %ymm2, %ymm0, %k2 -; AVX512BW-NEXT: .LBB7_3: ; AVX512BW-NEXT: korw %k0, %k1, %k0 -; AVX512BW-NEXT: korw %k2, %k0, %k0 +; AVX512BW-NEXT: testb $1, %dil +; AVX512BW-NEXT: je .LBB7_2 +; AVX512BW-NEXT: # %bb.1: +; AVX512BW-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 +; AVX512BW-NEXT: korw %k1, %k0, %k0 +; AVX512BW-NEXT: .LBB7_2: ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/bool-simplify.ll b/llvm/test/CodeGen/X86/bool-simplify.ll --- a/llvm/test/CodeGen/X86/bool-simplify.ll +++ b/llvm/test/CodeGen/X86/bool-simplify.ll @@ -51,10 +51,12 @@ define i16 @rnd16(i16 %arg) nounwind { ; CHECK-LABEL: rnd16: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: rdrandw %cx -; CHECK-NEXT: cmovbl %edi, %eax -; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: setae %dl +; CHECK-NEXT: leal (%rdi,%rcx), %eax +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %1 = tail call { i16, i32 } @llvm.x86.rdrand.16() nounwind @@ -69,10 +71,12 @@ define i32 @rnd32(i32 %arg) nounwind { ; CHECK-LABEL: rnd32: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: rdrandl %ecx -; CHECK-NEXT: cmovbl %edi, %eax -; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: setae %dl +; CHECK-NEXT: leal (%rdi,%rcx), %eax +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: retq %1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind %2 = extractvalue { i32, i32 } %1, 0 @@ -86,10 +90,11 @@ define i64 @rnd64(i64 %arg) nounwind { ; CHECK-LABEL: rnd64: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: rdrandq %rcx -; CHECK-NEXT: cmovbq %rdi, %rax -; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: setae %dl +; CHECK-NEXT: leaq (%rdi,%rcx), %rax +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: cmovneq %rcx, %rax ; CHECK-NEXT: retq %1 = tail call { i64, i32 } @llvm.x86.rdrand.64() nounwind %2 = extractvalue { i64, i32 } %1, 0 @@ -103,10 +108,12 @@ define i16 @seed16(i16 %arg) nounwind { ; CHECK-LABEL: seed16: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: rdseedw %cx -; CHECK-NEXT: cmovbl %edi, %eax -; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: setae %dl +; CHECK-NEXT: leal (%rdi,%rcx), %eax +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %1 = tail call { i16, i32 } @llvm.x86.rdseed.16() nounwind @@ -121,10 +128,12 @@ define i32 @seed32(i32 %arg) nounwind { ; CHECK-LABEL: seed32: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: rdseedl %ecx -; CHECK-NEXT: cmovbl %edi, %eax -; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: setae %dl +; CHECK-NEXT: leal (%rdi,%rcx), %eax +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: retq %1 = tail call { i32, i32 } @llvm.x86.rdseed.32() nounwind %2 = extractvalue { i32, i32 } %1, 0 @@ -138,10 +147,11 @@ define i64 @seed64(i64 %arg) nounwind { ; CHECK-LABEL: seed64: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: rdseedq %rcx -; CHECK-NEXT: cmovbq %rdi, %rax -; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: setae %dl +; CHECK-NEXT: leaq (%rdi,%rcx), %rax +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: cmovneq %rcx, %rax ; CHECK-NEXT: retq %1 = tail call { i64, i32 } @llvm.x86.rdseed.64() nounwind %2 = extractvalue { i64, i32 } %1, 0 diff --git a/llvm/test/CodeGen/X86/bool-vector.ll b/llvm/test/CodeGen/X86/bool-vector.ll --- a/llvm/test/CodeGen/X86/bool-vector.ll +++ b/llvm/test/CodeGen/X86/bool-vector.ll @@ -62,50 +62,46 @@ define i32 @PR15215_good(<4 x i32> %input) { ; X86-LABEL: PR15215_good: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: andl $1, %esi -; X86-NEXT: andl $1, %edx -; X86-NEXT: andl $1, %ecx ; X86-NEXT: andl $1, %eax -; X86-NEXT: leal (%esi,%edx,2), %edx -; X86-NEXT: leal (%edx,%ecx,4), %ecx -; X86-NEXT: leal (%ecx,%eax,8), %eax -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: jne .LBB1_1 +; X86-NEXT: # %bb.2: # %entry +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: jne .LBB1_3 +; X86-NEXT: .LBB1_4: # %entry +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: jne .LBB1_5 +; X86-NEXT: .LBB1_6: # %entry +; X86-NEXT: retl +; X86-NEXT: .LBB1_1: +; X86-NEXT: orl $2, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB1_4 +; X86-NEXT: .LBB1_3: +; X86-NEXT: orl $4, %eax +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: je .LBB1_6 +; X86-NEXT: .LBB1_5: +; X86-NEXT: orl $8, %eax ; X86-NEXT: retl ; ; X64-LABEL: PR15215_good: ; X64: # %bb.0: # %entry -; X64-NEXT: # kill: def $ecx killed $ecx def $rcx -; X64-NEXT: # kill: def $edx killed $edx def $rdx -; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: andl $1, %edi -; X64-NEXT: andl $1, %esi -; X64-NEXT: andl $1, %edx -; X64-NEXT: andl $1, %ecx -; X64-NEXT: leal (%rdi,%rsi,2), %eax -; X64-NEXT: leal (%rax,%rdx,4), %eax -; X64-NEXT: leal (%rax,%rcx,8), %eax +; X64-NEXT: leal 2(%rdi), %eax +; X64-NEXT: testb $1, %sil +; X64-NEXT: cmovel %edi, %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: orl $4, %esi +; X64-NEXT: testb $1, %dl +; X64-NEXT: cmovel %eax, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: orl $8, %eax +; X64-NEXT: testb $1, %cl +; X64-NEXT: cmovel %esi, %eax ; X64-NEXT: retq -; -; SSE2-LABEL: PR15215_good: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: ret{{[l|q]}} -; -; AVX2-LABEL: PR15215_good: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX2-NEXT: vmovmskps %xmm0, %eax -; AVX2-NEXT: ret{{[l|q]}} entry: %0 = trunc <4 x i32> %input to <4 x i1> %1 = extractelement <4 x i1> %0, i32 0 diff --git a/llvm/test/CodeGen/X86/fildll.ll b/llvm/test/CodeGen/X86/fildll.ll --- a/llvm/test/CodeGen/X86/fildll.ll +++ b/llvm/test/CodeGen/X86/fildll.ll @@ -34,9 +34,14 @@ ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %ecx, (%esp) -; CHECK-NEXT: shrl $31, %edx +; CHECK-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: jns .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addl $4, %eax +; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: fildll (%esp) -; CHECK-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%edx,4) +; CHECK-NEXT: fadds (%eax) ; CHECK-NEXT: fstpl {{[0-9]+}}(%esp) ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %ebp, %esp diff --git a/llvm/test/CodeGen/X86/fold-select.ll b/llvm/test/CodeGen/X86/fold-select.ll --- a/llvm/test/CodeGen/X86/fold-select.ll +++ b/llvm/test/CodeGen/X86/fold-select.ll @@ -4,9 +4,16 @@ define <8 x float> @select_and_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, <8 x float> %d) { ; CHECK-LABEL: select_and_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpternlogq $200, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: vpsllw $15, %xmm2, %xmm4 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpgtw %xmm4, %xmm0, %k0 {%k1} +; CHECK-NEXT: vpand %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 +; CHECK-NEXT: vpmovw2m %xmm0, %k2 +; CHECK-NEXT: kandnb %k2, %k1, %k1 +; CHECK-NEXT: korb %k1, %k0, %k1 ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; CHECK-NEXT: vmovaps %ymm3, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -19,9 +26,13 @@ define <8 x float> @select_and_v8i1_2(i8 %m1, i8 %m2, i8 %m3, <8 x float> %d) { ; CHECK-LABEL: select_and_v8i1_2: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: andl %edx, %edi -; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: kmovd %edi, %k0 +; CHECK-NEXT: andl %edx, %esi +; CHECK-NEXT: kmovd %edx, %k1 +; CHECK-NEXT: kandb %k0, %k1, %k1 +; CHECK-NEXT: kmovd %esi, %k2 +; CHECK-NEXT: kandnb %k2, %k0, %k0 +; CHECK-NEXT: korb %k0, %k1, %k1 ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; CHECK-NEXT: vmovaps %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -38,10 +49,11 @@ define <8 x float> @select_and_v8i1_3(<8 x i16> %m1, <8 x i16> %m2, <8 x i16> %m3, <8 x float> %d) { ; CHECK-LABEL: select_and_v8i1_3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 -; CHECK-NEXT: vpcmpeqw %xmm2, %xmm0, %k1 +; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1 +; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: vpcmpeqw %xmm2, %xmm0, %k1 {%k1} +; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k1 {%k1} ; CHECK-NEXT: korb %k1, %k0, %k1 -; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1 {%k1} ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; CHECK-NEXT: vmovaps %ymm3, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -57,13 +69,15 @@ define <8 x float> @select_or_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, <8 x float> %d) { ; CHECK-LABEL: select_or_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsllw $15, %xmm2, %xmm2 -; CHECK-NEXT: vpmovw2m %xmm2, %k0 -; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 -; CHECK-NEXT: vpmovw2m %xmm1, %k1 +; CHECK-NEXT: vpsllw $15, %xmm2, %xmm4 +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 +; CHECK-NEXT: vpmovw2m %xmm0, %k1 +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpgtw %xmm4, %xmm0, %k0 {%k1} +; CHECK-NEXT: vpor %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 ; CHECK-NEXT: vpmovw2m %xmm0, %k2 -; CHECK-NEXT: kandnb %k1, %k2, %k1 +; CHECK-NEXT: kandnb %k2, %k1, %k1 ; CHECK-NEXT: korb %k1, %k0, %k1 ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; CHECK-NEXT: vmovaps %ymm3, %ymm0 {%k1} @@ -78,10 +92,12 @@ ; CHECK-LABEL: select_or_v8i1_2: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kmovd %edx, %k2 -; CHECK-NEXT: kandnb %k1, %k0, %k0 -; CHECK-NEXT: korb %k0, %k2, %k1 +; CHECK-NEXT: orl %edx, %esi +; CHECK-NEXT: kmovd %edx, %k1 +; CHECK-NEXT: kandb %k0, %k1, %k1 +; CHECK-NEXT: kmovd %esi, %k2 +; CHECK-NEXT: kandnb %k2, %k0, %k0 +; CHECK-NEXT: korb %k0, %k1, %k1 ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; CHECK-NEXT: vmovaps %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 @@ -98,10 +114,12 @@ define <8 x float> @select_or_v8i1_3(<8 x i16> %m1, <8 x i16> %m2, <8 x i16> %m3, <8 x float> %d) { ; CHECK-LABEL: select_or_v8i1_3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k0 -; CHECK-NEXT: vpcmpeqw %xmm2, %xmm0, %k1 {%k1} -; CHECK-NEXT: korb %k1, %k0, %k1 +; CHECK-NEXT: vpcmpeqw %xmm2, %xmm0, %k0 +; CHECK-NEXT: vpcmpeqw %xmm2, %xmm1, %k1 +; CHECK-NEXT: korb %k0, %k1, %k2 +; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0 {%k2} +; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1 {%k1} +; CHECK-NEXT: korb %k0, %k1, %k1 ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; CHECK-NEXT: vmovaps %ymm3, %ymm0 {%k1} ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fp-cvt.ll b/llvm/test/CodeGen/X86/fp-cvt.ll --- a/llvm/test/CodeGen/X86/fp-cvt.ll +++ b/llvm/test/CodeGen/X86/fp-cvt.ll @@ -444,20 +444,21 @@ ; X86-NEXT: subl $16, %esp ; X86-NEXT: fldt 8(%ebp) ; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NEXT: fucom %st(1) +; X86-NEXT: fld %st(1) +; X86-NEXT: fsub %st(1), %st +; X86-NEXT: fxch %st(1) +; X86-NEXT: fucomp %st(2) ; X86-NEXT: fnstsw %ax -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: # kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf -; X86-NEXT: setbe %al -; X86-NEXT: fldz ; X86-NEXT: jbe .LBB10_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: fstp %st(1) +; X86-NEXT: fstp %st(0) ; X86-NEXT: fldz +; X86-NEXT: fxch %st(1) ; X86-NEXT: .LBB10_2: -; X86-NEXT: fstp %st(0) -; X86-NEXT: fsubrp %st, %st(1) +; X86-NEXT: fstp %st(1) +; X86-NEXT: setbe %al ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -465,7 +466,7 @@ ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: movb %al, %dl +; X86-NEXT: movzbl %al, %edx ; X86-NEXT: shll $31, %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -524,20 +525,21 @@ ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: fldt (%eax) ; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-NEXT: fucom %st(1) +; X86-NEXT: fld %st(1) +; X86-NEXT: fsub %st(1), %st +; X86-NEXT: fxch %st(1) +; X86-NEXT: fucomp %st(2) ; X86-NEXT: fnstsw %ax -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: # kill: def $ah killed $ah killed $ax ; X86-NEXT: sahf -; X86-NEXT: setbe %al -; X86-NEXT: fldz ; X86-NEXT: jbe .LBB11_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: fstp %st(1) +; X86-NEXT: fstp %st(0) ; X86-NEXT: fldz +; X86-NEXT: fxch %st(1) ; X86-NEXT: .LBB11_2: -; X86-NEXT: fstp %st(0) -; X86-NEXT: fsubrp %st, %st(1) +; X86-NEXT: fstp %st(1) +; X86-NEXT: setbe %al ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -545,7 +547,7 @@ ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: movb %al, %dl +; X86-NEXT: movzbl %al, %edx ; X86-NEXT: shll $31, %edx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -823,9 +825,14 @@ ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: shrl $31, %ecx +; X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: jns .LBB22_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: addl $4, %eax +; X86-NEXT: .LBB22_2: ; X86-NEXT: fildll (%esp) -; X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X86-NEXT: fadds (%eax) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl @@ -852,12 +859,17 @@ ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: movl (%eax), %ecx -; X86-NEXT: movl 4(%eax), %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %ecx, (%esp) -; X86-NEXT: shrl $31, %eax +; X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-NEXT: testl %edx, %edx +; X86-NEXT: jns .LBB23_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: addl $4, %eax +; X86-NEXT: .LBB23_2: ; X86-NEXT: fildll (%esp) -; X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-NEXT: fadds (%eax) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2479,9 +2479,12 @@ ; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X87-NEXT: movl %eax, (%esp) -; X87-NEXT: shrl $31, %ecx +; X87-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X87-NEXT: leal 4(%eax), %edx +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: cmovnsl %eax, %edx ; X87-NEXT: fildll (%esp) -; X87-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X87-NEXT: fadds (%edx) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: wait @@ -2493,12 +2496,14 @@ ; X86-SSE: # %bb.0: # %entry ; X86-SSE-NEXT: subl $28, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 32 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: shrl $31, %eax +; X86-SSE-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-SSE-NEXT: leal 4(%eax), %ecx +; X86-SSE-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: cmovnsl %eax, %ecx ; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-SSE-NEXT: fadds (%ecx) ; X86-SSE-NEXT: fstpl {{[0-9]+}}(%esp) ; X86-SSE-NEXT: wait ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -2701,9 +2706,12 @@ ; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X87-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X87-NEXT: shrl $31, %ecx +; X87-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X87-NEXT: leal 4(%eax), %edx +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: cmovnsl %eax, %edx ; X87-NEXT: fildll {{[0-9]+}}(%esp) -; X87-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X87-NEXT: fadds (%edx) ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: wait @@ -2715,12 +2723,14 @@ ; X86-SSE: # %bb.0: # %entry ; X86-SSE-NEXT: subl $20, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 24 -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: shrl $31, %eax +; X86-SSE-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-SSE-NEXT: leal 4(%eax), %ecx +; X86-SSE-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: cmovnsl %eax, %ecx ; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-SSE-NEXT: fadds (%ecx) ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: wait ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll --- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll @@ -588,12 +588,14 @@ ; SSE-X86-NEXT: .cfi_def_cfa_register %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $16, %esp -; SSE-X86-NEXT: movl 12(%ebp), %eax ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; SSE-X86-NEXT: shrl $31, %eax +; SSE-X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE-X86-NEXT: leal 4(%eax), %ecx +; SSE-X86-NEXT: cmpl $0, 12(%ebp) +; SSE-X86-NEXT: cmovnsl %eax, %ecx ; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp) -; SSE-X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE-X86-NEXT: fadds (%ecx) ; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -630,12 +632,14 @@ ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp ; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $16, %esp -; AVX-X86-NEXT: movl 12(%ebp), %eax ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; AVX-X86-NEXT: shrl $31, %eax +; AVX-X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; AVX-X86-NEXT: leal 4(%eax), %ecx +; AVX-X86-NEXT: cmpl $0, 12(%ebp) +; AVX-X86-NEXT: cmovnsl %eax, %ecx ; AVX-X86-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-X86-NEXT: fadds (%ecx) ; AVX-X86-NEXT: fstps {{[0-9]+}}(%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -681,9 +685,14 @@ ; X87-NEXT: movl 12(%ebp), %ecx ; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X87-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X87-NEXT: shrl $31, %ecx +; X87-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: jns .LBB9_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: addl $4, %eax +; X87-NEXT: .LBB9_2: ; X87-NEXT: fildll {{[0-9]+}}(%esp) -; X87-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X87-NEXT: fadds (%eax) ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: wait @@ -1277,12 +1286,14 @@ ; SSE-X86-NEXT: .cfi_def_cfa_register %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $24, %esp -; SSE-X86-NEXT: movl 12(%ebp), %eax ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; SSE-X86-NEXT: shrl $31, %eax +; SSE-X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE-X86-NEXT: leal 4(%eax), %ecx +; SSE-X86-NEXT: cmpl $0, 12(%ebp) +; SSE-X86-NEXT: cmovnsl %eax, %ecx ; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp) -; SSE-X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE-X86-NEXT: fadds (%ecx) ; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp) ; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -1319,12 +1330,14 @@ ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp ; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $24, %esp -; AVX-X86-NEXT: movl 12(%ebp), %eax ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-X86-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; AVX-X86-NEXT: shrl $31, %eax +; AVX-X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; AVX-X86-NEXT: leal 4(%eax), %ecx +; AVX-X86-NEXT: cmpl $0, 12(%ebp) +; AVX-X86-NEXT: cmovnsl %eax, %ecx ; AVX-X86-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-X86-NEXT: fadds (%ecx) ; AVX-X86-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-X86-NEXT: wait ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero @@ -1370,9 +1383,14 @@ ; X87-NEXT: movl 12(%ebp), %ecx ; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X87-NEXT: movl %eax, (%esp) -; X87-NEXT: shrl $31, %ecx +; X87-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: jns .LBB18_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: addl $4, %eax +; X87-NEXT: .LBB18_2: ; X87-NEXT: fildll (%esp) -; X87-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X87-NEXT: fadds (%eax) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: wait diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll --- a/llvm/test/CodeGen/X86/fp128-i128.ll +++ b/llvm/test/CodeGen/X86/fp128-i128.ll @@ -134,11 +134,11 @@ ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE-NEXT: callq __lttf2@PLT -; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %ecx +; SSE-NEXT: leaq 16(%rcx), %rdx ; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: sets %cl -; SSE-NEXT: shlq $4, %rcx -; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rcx), %xmm0 +; SSE-NEXT: cmovnsq %rcx, %rdx +; SSE-NEXT: movaps (%rdx), %xmm0 ; SSE-NEXT: popq %rax ; SSE-NEXT: retq ; @@ -148,11 +148,11 @@ ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; AVX-NEXT: callq __lttf2@PLT -; AVX-NEXT: xorl %ecx, %ecx +; AVX-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %ecx +; AVX-NEXT: leaq 16(%rcx), %rdx ; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: sets %cl -; AVX-NEXT: shlq $4, %rcx -; AVX-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rcx), %xmm0 +; AVX-NEXT: cmovnsq %rcx, %rdx +; AVX-NEXT: vmovaps (%rdx), %xmm0 ; AVX-NEXT: popq %rax ; AVX-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll --- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -903,9 +903,14 @@ ; X86-NEXT: movl 12(%ebp), %ecx ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, (%esp) -; X86-NEXT: shrl $31, %ecx +; X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: jns .LBB28_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: addl $4, %eax +; X86-NEXT: .LBB28_2: ; X86-NEXT: fildll (%esp) -; X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X86-NEXT: fadds (%eax) ; X86-NEXT: wait ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll --- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll @@ -408,31 +408,30 @@ ; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: flds {{[0-9]+}}(%esp) ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fucom %st(1) +; X86-X87-NEXT: fld %st(1) +; X86-X87-NEXT: fsub %st(1), %st +; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: fucomp %st(2) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: setbe %al -; X86-X87-NEXT: fldz ; X86-X87-NEXT: jbe .LBB6_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: fstp %st(0) ; X86-X87-NEXT: fld %st(0) -; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: .LBB6_2: -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsubr %st(2), %st +; X86-X87-NEXT: setbe %al ; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: orl $3072, %edx # imm = 0xC00 -; X86-X87-NEXT: movw %dx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-X87-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movb %al, %cl -; X86-X87-NEXT: shll $31, %ecx -; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movzbl %al, %esi +; X86-X87-NEXT: shll $31, %esi +; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fucom %st(1) ; X86-X87-NEXT: fstp %st(1) @@ -440,10 +439,10 @@ ; X86-X87-NEXT: xorl %edx, %edx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %esi +; X86-X87-NEXT: movl $0, %ecx ; X86-X87-NEXT: jb .LBB6_4 ; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl %ecx, %esi +; X86-X87-NEXT: movl %esi, %ecx ; X86-X87-NEXT: .LBB6_4: ; X86-X87-NEXT: jb .LBB6_6 ; X86-X87-NEXT: # %bb.5: @@ -463,7 +462,7 @@ ; X86-X87-NEXT: movl $262143, %edx # imm = 0x3FFFF ; X86-X87-NEXT: ja .LBB6_10 ; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %esi, %edx +; X86-X87-NEXT: movl %ecx, %edx ; X86-X87-NEXT: .LBB6_10: ; X86-X87-NEXT: addl $16, %esp ; X86-X87-NEXT: popl %esi @@ -475,15 +474,13 @@ ; X86-SSE-NEXT: subl $16, %esp ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movaps %xmm0, %xmm1 ; X86-SSE-NEXT: ucomiss %xmm0, %xmm2 -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: jbe .LBB6_2 +; X86-SSE-NEXT: ja .LBB6_2 ; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: xorps %xmm2, %xmm2 +; X86-SSE-NEXT: subss %xmm2, %xmm1 ; X86-SSE-NEXT: .LBB6_2: -; X86-SSE-NEXT: movaps %xmm0, %xmm3 -; X86-SSE-NEXT: subss %xmm2, %xmm3 -; X86-SSE-NEXT: movss %xmm3, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss %xmm1, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: setbe %cl ; X86-SSE-NEXT: flds {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -494,6 +491,7 @@ ; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: xorps %xmm1, %xmm1 ; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 ; X86-SSE-NEXT: movl $0, %esi ; X86-SSE-NEXT: jb .LBB6_4 @@ -530,51 +528,49 @@ define i64 @test_unsigned_i64_f32(float %f) nounwind { ; X86-X87-LABEL: test_unsigned_i64_f32: ; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $20, %esp +; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: flds {{[0-9]+}}(%esp) ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fucom %st(1) +; X86-X87-NEXT: fld %st(1) +; X86-X87-NEXT: fsub %st(1), %st +; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: fucomp %st(2) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: setbe %al -; X86-X87-NEXT: fldz ; X86-X87-NEXT: jbe .LBB7_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: fstp %st(0) ; X86-X87-NEXT: fld %st(0) -; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: .LBB7_2: -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsubr %st(2), %st +; X86-X87-NEXT: setbe %al ; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: orl $3072, %edx # imm = 0xC00 -; X86-X87-NEXT: movw %dx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-X87-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movb %al, %cl -; X86-X87-NEXT: shll $31, %ecx -; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movzbl %al, %edx +; X86-X87-NEXT: shll $31, %edx +; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fucom %st(1) ; X86-X87-NEXT: fstp %st(1) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %edi +; X86-X87-NEXT: movl $0, %esi ; X86-X87-NEXT: jb .LBB7_4 ; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl %ecx, %edi +; X86-X87-NEXT: movl %edx, %esi ; X86-X87-NEXT: .LBB7_4: ; X86-X87-NEXT: jb .LBB7_6 ; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-X87-NEXT: .LBB7_6: ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ; X86-X87-NEXT: fxch %st(1) @@ -586,12 +582,11 @@ ; X86-X87-NEXT: movl $-1, %edx ; X86-X87-NEXT: ja .LBB7_8 ; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %esi, %eax -; X86-X87-NEXT: movl %edi, %edx +; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: movl %esi, %edx ; X86-X87-NEXT: .LBB7_8: -; X86-X87-NEXT: addl $20, %esp +; X86-X87-NEXT: addl $16, %esp ; X86-X87-NEXT: popl %esi -; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: retl ; ; X86-SSE-LABEL: test_unsigned_i64_f32: @@ -599,15 +594,13 @@ ; X86-SSE-NEXT: subl $20, %esp ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movaps %xmm0, %xmm1 ; X86-SSE-NEXT: ucomiss %xmm0, %xmm2 -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: jbe .LBB7_2 +; X86-SSE-NEXT: ja .LBB7_2 ; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: xorps %xmm2, %xmm2 +; X86-SSE-NEXT: subss %xmm2, %xmm1 ; X86-SSE-NEXT: .LBB7_2: -; X86-SSE-NEXT: movaps %xmm0, %xmm3 -; X86-SSE-NEXT: subss %xmm2, %xmm3 -; X86-SSE-NEXT: movss %xmm3, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss %xmm1, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: setbe %cl ; X86-SSE-NEXT: flds {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -618,6 +611,7 @@ ; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: xorps %xmm1, %xmm1 ; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 ; X86-SSE-NEXT: movl $0, %eax ; X86-SSE-NEXT: jb .LBB7_4 @@ -1336,31 +1330,30 @@ ; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fucom %st(1) +; X86-X87-NEXT: fld %st(1) +; X86-X87-NEXT: fsub %st(1), %st +; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: fucomp %st(2) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: setbe %al -; X86-X87-NEXT: fldz ; X86-X87-NEXT: jbe .LBB16_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: fstp %st(0) ; X86-X87-NEXT: fld %st(0) -; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: .LBB16_2: -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsubr %st(2), %st +; X86-X87-NEXT: setbe %al ; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: orl $3072, %edx # imm = 0xC00 -; X86-X87-NEXT: movw %dx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-X87-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movb %al, %cl -; X86-X87-NEXT: shll $31, %ecx -; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movzbl %al, %esi +; X86-X87-NEXT: shll $31, %esi +; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fucom %st(1) ; X86-X87-NEXT: fstp %st(1) @@ -1368,10 +1361,10 @@ ; X86-X87-NEXT: xorl %edx, %edx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %esi +; X86-X87-NEXT: movl $0, %ecx ; X86-X87-NEXT: jb .LBB16_4 ; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl %ecx, %esi +; X86-X87-NEXT: movl %esi, %ecx ; X86-X87-NEXT: .LBB16_4: ; X86-X87-NEXT: jb .LBB16_6 ; X86-X87-NEXT: # %bb.5: @@ -1391,7 +1384,7 @@ ; X86-X87-NEXT: movl $262143, %edx # imm = 0x3FFFF ; X86-X87-NEXT: ja .LBB16_10 ; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %esi, %edx +; X86-X87-NEXT: movl %ecx, %edx ; X86-X87-NEXT: .LBB16_10: ; X86-X87-NEXT: addl $16, %esp ; X86-X87-NEXT: popl %esi @@ -1403,15 +1396,13 @@ ; X86-SSE-NEXT: subl $16, %esp ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; X86-SSE-NEXT: movapd %xmm0, %xmm1 ; X86-SSE-NEXT: ucomisd %xmm0, %xmm2 -; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: jbe .LBB16_2 +; X86-SSE-NEXT: ja .LBB16_2 ; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: xorpd %xmm2, %xmm2 +; X86-SSE-NEXT: subsd %xmm2, %xmm1 ; X86-SSE-NEXT: .LBB16_2: -; X86-SSE-NEXT: movapd %xmm0, %xmm3 -; X86-SSE-NEXT: subsd %xmm2, %xmm3 -; X86-SSE-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movsd %xmm1, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: setbe %cl ; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -1422,6 +1413,7 @@ ; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: xorpd %xmm1, %xmm1 ; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 ; X86-SSE-NEXT: movl $0, %esi ; X86-SSE-NEXT: jb .LBB16_4 @@ -1454,51 +1446,49 @@ define i64 @test_unsigned_i64_f64(double %f) nounwind { ; X86-X87-LABEL: test_unsigned_i64_f64: ; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $20, %esp +; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fucom %st(1) +; X86-X87-NEXT: fld %st(1) +; X86-X87-NEXT: fsub %st(1), %st +; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: fucomp %st(2) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: setbe %al -; X86-X87-NEXT: fldz ; X86-X87-NEXT: jbe .LBB17_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: fstp %st(0) ; X86-X87-NEXT: fld %st(0) -; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: .LBB17_2: -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsubr %st(2), %st +; X86-X87-NEXT: setbe %al ; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: orl $3072, %edx # imm = 0xC00 -; X86-X87-NEXT: movw %dx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-X87-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movb %al, %cl -; X86-X87-NEXT: shll $31, %ecx -; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movzbl %al, %edx +; X86-X87-NEXT: shll $31, %edx +; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fucom %st(1) ; X86-X87-NEXT: fstp %st(1) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %edi +; X86-X87-NEXT: movl $0, %esi ; X86-X87-NEXT: jb .LBB17_4 ; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl %ecx, %edi +; X86-X87-NEXT: movl %edx, %esi ; X86-X87-NEXT: .LBB17_4: ; X86-X87-NEXT: jb .LBB17_6 ; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-X87-NEXT: .LBB17_6: ; X86-X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}} ; X86-X87-NEXT: fxch %st(1) @@ -1510,12 +1500,11 @@ ; X86-X87-NEXT: movl $-1, %edx ; X86-X87-NEXT: ja .LBB17_8 ; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %esi, %eax -; X86-X87-NEXT: movl %edi, %edx +; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: movl %esi, %edx ; X86-X87-NEXT: .LBB17_8: -; X86-X87-NEXT: addl $20, %esp +; X86-X87-NEXT: addl $16, %esp ; X86-X87-NEXT: popl %esi -; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: retl ; ; X86-SSE-LABEL: test_unsigned_i64_f64: @@ -1523,15 +1512,13 @@ ; X86-SSE-NEXT: subl $20, %esp ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; X86-SSE-NEXT: movapd %xmm0, %xmm1 ; X86-SSE-NEXT: ucomisd %xmm0, %xmm2 -; X86-SSE-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE-NEXT: jbe .LBB17_2 +; X86-SSE-NEXT: ja .LBB17_2 ; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: xorpd %xmm2, %xmm2 +; X86-SSE-NEXT: subsd %xmm2, %xmm1 ; X86-SSE-NEXT: .LBB17_2: -; X86-SSE-NEXT: movapd %xmm0, %xmm3 -; X86-SSE-NEXT: subsd %xmm2, %xmm3 -; X86-SSE-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movsd %xmm1, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: setbe %cl ; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -1542,6 +1529,7 @@ ; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: xorpd %xmm1, %xmm1 ; X86-SSE-NEXT: ucomisd %xmm1, %xmm0 ; X86-SSE-NEXT: movl $0, %eax ; X86-SSE-NEXT: jb .LBB17_4 @@ -2384,42 +2372,44 @@ ; X86-X87-NEXT: movl %eax, (%esp) ; X86-X87-NEXT: calll __gnu_h2f_ieee ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: fld %st(1) +; X86-X87-NEXT: fsub %st(1), %st +; X86-X87-NEXT: fxch %st(2) ; X86-X87-NEXT: fucom %st(1) +; X86-X87-NEXT: fstp %st(1) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: setae %al -; X86-X87-NEXT: fldz ; X86-X87-NEXT: jae .LBB26_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: fstp %st(2) -; X86-X87-NEXT: fld %st(1) -; X86-X87-NEXT: fxch %st(2) +; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: fld %st(0) +; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: .LBB26_2: -; X86-X87-NEXT: fxch %st(2) -; X86-X87-NEXT: fsubr %st(1), %st +; X86-X87-NEXT: setae %al ; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: orl $3072, %edx # imm = 0xC00 -; X86-X87-NEXT: movw %dx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-X87-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movb %al, %cl -; X86-X87-NEXT: shll $31, %ecx -; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movzbl %al, %esi +; X86-X87-NEXT: shll $31, %esi +; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: fldz +; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fucom %st(1) ; X86-X87-NEXT: fstp %st(1) ; X86-X87-NEXT: fnstsw %ax ; X86-X87-NEXT: xorl %edx, %edx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %esi +; X86-X87-NEXT: movl $0, %ecx ; X86-X87-NEXT: jb .LBB26_4 ; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl %ecx, %esi +; X86-X87-NEXT: movl %esi, %ecx ; X86-X87-NEXT: .LBB26_4: ; X86-X87-NEXT: jb .LBB26_6 ; X86-X87-NEXT: # %bb.5: @@ -2439,7 +2429,7 @@ ; X86-X87-NEXT: movl $262143, %edx # imm = 0x3FFFF ; X86-X87-NEXT: ja .LBB26_10 ; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %esi, %edx +; X86-X87-NEXT: movl %ecx, %edx ; X86-X87-NEXT: .LBB26_10: ; X86-X87-NEXT: addl $24, %esp ; X86-X87-NEXT: popl %esi @@ -2456,15 +2446,13 @@ ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movaps %xmm0, %xmm1 ; X86-SSE-NEXT: ucomiss %xmm2, %xmm0 -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: jae .LBB26_2 +; X86-SSE-NEXT: jb .LBB26_2 ; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: xorps %xmm2, %xmm2 +; X86-SSE-NEXT: subss %xmm2, %xmm1 ; X86-SSE-NEXT: .LBB26_2: -; X86-SSE-NEXT: movaps %xmm0, %xmm3 -; X86-SSE-NEXT: subss %xmm2, %xmm3 -; X86-SSE-NEXT: movss %xmm3, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss %xmm1, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: setae %cl ; X86-SSE-NEXT: flds {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -2475,6 +2463,7 @@ ; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: xorl %eax, %eax +; X86-SSE-NEXT: xorps %xmm1, %xmm1 ; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 ; X86-SSE-NEXT: movl $0, %esi ; X86-SSE-NEXT: jb .LBB26_4 @@ -2521,53 +2510,54 @@ define i64 @test_unsigned_i64_f16(half %f) nounwind { ; X86-X87-LABEL: test_unsigned_i64_f16: ; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $20, %esp +; X86-X87-NEXT: subl $24, %esp ; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-X87-NEXT: movl %eax, (%esp) ; X86-X87-NEXT: calll __gnu_h2f_ieee ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: fld %st(1) +; X86-X87-NEXT: fsub %st(1), %st +; X86-X87-NEXT: fxch %st(2) ; X86-X87-NEXT: fucom %st(1) +; X86-X87-NEXT: fstp %st(1) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: setae %al -; X86-X87-NEXT: fldz ; X86-X87-NEXT: jae .LBB27_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: fstp %st(2) -; X86-X87-NEXT: fld %st(1) -; X86-X87-NEXT: fxch %st(2) +; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: fld %st(0) +; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: .LBB27_2: -; X86-X87-NEXT: fxch %st(2) -; X86-X87-NEXT: fsubr %st(1), %st +; X86-X87-NEXT: setae %al ; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: orl $3072, %edx # imm = 0xC00 -; X86-X87-NEXT: movw %dx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-X87-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movb %al, %cl -; X86-X87-NEXT: shll $31, %ecx -; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movzbl %al, %edx +; X86-X87-NEXT: shll $31, %edx +; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: fldz +; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fucom %st(1) ; X86-X87-NEXT: fstp %st(1) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %edi +; X86-X87-NEXT: movl $0, %esi ; X86-X87-NEXT: jb .LBB27_4 ; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl %ecx, %edi +; X86-X87-NEXT: movl %edx, %esi ; X86-X87-NEXT: .LBB27_4: ; X86-X87-NEXT: jb .LBB27_6 ; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-X87-NEXT: .LBB27_6: ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ; X86-X87-NEXT: fxch %st(1) @@ -2579,12 +2569,11 @@ ; X86-X87-NEXT: movl $-1, %edx ; X86-X87-NEXT: ja .LBB27_8 ; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %esi, %eax -; X86-X87-NEXT: movl %edi, %edx +; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: movl %esi, %edx ; X86-X87-NEXT: .LBB27_8: -; X86-X87-NEXT: addl $20, %esp +; X86-X87-NEXT: addl $24, %esp ; X86-X87-NEXT: popl %esi -; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: retl ; ; X86-SSE-LABEL: test_unsigned_i64_f16: @@ -2597,15 +2586,13 @@ ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movaps %xmm0, %xmm1 ; X86-SSE-NEXT: ucomiss %xmm2, %xmm0 -; X86-SSE-NEXT: xorps %xmm1, %xmm1 -; X86-SSE-NEXT: jae .LBB27_2 +; X86-SSE-NEXT: jb .LBB27_2 ; X86-SSE-NEXT: # %bb.1: -; X86-SSE-NEXT: xorps %xmm2, %xmm2 +; X86-SSE-NEXT: subss %xmm2, %xmm1 ; X86-SSE-NEXT: .LBB27_2: -; X86-SSE-NEXT: movaps %xmm0, %xmm3 -; X86-SSE-NEXT: subss %xmm2, %xmm3 -; X86-SSE-NEXT: movss %xmm3, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss %xmm1, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: setae %cl ; X86-SSE-NEXT: flds {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -2616,6 +2603,7 @@ ; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: xorps %xmm1, %xmm1 ; X86-SSE-NEXT: ucomiss %xmm1, %xmm0 ; X86-SSE-NEXT: movl $0, %eax ; X86-SSE-NEXT: jb .LBB27_4 @@ -3575,31 +3563,30 @@ ; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: fldt {{[0-9]+}}(%esp) ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fucom %st(1) +; X86-X87-NEXT: fld %st(1) +; X86-X87-NEXT: fsub %st(1), %st +; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: fucomp %st(2) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: setbe %al -; X86-X87-NEXT: fldz ; X86-X87-NEXT: jbe .LBB36_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: fstp %st(0) ; X86-X87-NEXT: fld %st(0) -; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: .LBB36_2: -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsubr %st(2), %st +; X86-X87-NEXT: setbe %al ; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: orl $3072, %edx # imm = 0xC00 -; X86-X87-NEXT: movw %dx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-X87-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movb %al, %cl -; X86-X87-NEXT: shll $31, %ecx -; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movzbl %al, %esi +; X86-X87-NEXT: shll $31, %esi +; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fucom %st(1) ; X86-X87-NEXT: fstp %st(1) @@ -3607,10 +3594,10 @@ ; X86-X87-NEXT: xorl %edx, %edx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %esi +; X86-X87-NEXT: movl $0, %ecx ; X86-X87-NEXT: jb .LBB36_4 ; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl %ecx, %esi +; X86-X87-NEXT: movl %esi, %ecx ; X86-X87-NEXT: .LBB36_4: ; X86-X87-NEXT: jb .LBB36_6 ; X86-X87-NEXT: # %bb.5: @@ -3630,7 +3617,7 @@ ; X86-X87-NEXT: movl $262143, %edx # imm = 0x3FFFF ; X86-X87-NEXT: ja .LBB36_10 ; X86-X87-NEXT: # %bb.9: -; X86-X87-NEXT: movl %esi, %edx +; X86-X87-NEXT: movl %ecx, %edx ; X86-X87-NEXT: .LBB36_10: ; X86-X87-NEXT: addl $16, %esp ; X86-X87-NEXT: popl %esi @@ -3642,15 +3629,15 @@ ; X86-SSE-NEXT: subl $16, %esp ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) ; X86-SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE-NEXT: fld %st(1) +; X86-SSE-NEXT: fsub %st(1), %st ; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: fucomi %st(1), %st -; X86-SSE-NEXT: setbe %cl -; X86-SSE-NEXT: fldz -; X86-SSE-NEXT: fld %st(0) -; X86-SSE-NEXT: fcmovbe %st(2), %st -; X86-SSE-NEXT: fstp %st(2) ; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fsubr %st(2), %st +; X86-SSE-NEXT: fucompi %st(2), %st +; X86-SSE-NEXT: fld %st(1) +; X86-SSE-NEXT: fcmovbe %st(1), %st +; X86-SSE-NEXT: fstp %st(1) +; X86-SSE-NEXT: setbe %cl ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-SSE-NEXT: orl $3072, %edx # imm = 0xC00 @@ -3659,6 +3646,7 @@ ; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: xorl %esi, %esi +; X86-SSE-NEXT: fldz ; X86-SSE-NEXT: fxch %st(1) ; X86-SSE-NEXT: fucomi %st(1), %st ; X86-SSE-NEXT: fstp %st(1) @@ -3724,51 +3712,49 @@ define i64 @test_unsigned_i64_f80(x86_fp80 %f) nounwind { ; X86-X87-LABEL: test_unsigned_i64_f80: ; X86-X87: # %bb.0: -; X86-X87-NEXT: pushl %edi ; X86-X87-NEXT: pushl %esi -; X86-X87-NEXT: subl $20, %esp +; X86-X87-NEXT: subl $16, %esp ; X86-X87-NEXT: fldt {{[0-9]+}}(%esp) ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X86-X87-NEXT: fucom %st(1) +; X86-X87-NEXT: fld %st(1) +; X86-X87-NEXT: fsub %st(1), %st +; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: fucomp %st(2) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: setbe %al -; X86-X87-NEXT: fldz ; X86-X87-NEXT: jbe .LBB37_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: fstp %st(1) +; X86-X87-NEXT: fstp %st(0) ; X86-X87-NEXT: fld %st(0) -; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: .LBB37_2: -; X86-X87-NEXT: fxch %st(1) -; X86-X87-NEXT: fsubr %st(2), %st +; X86-X87-NEXT: setbe %al ; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; X86-X87-NEXT: orl $3072, %edx # imm = 0xC00 -; X86-X87-NEXT: movw %dx, {{[0-9]+}}(%esp) +; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: orl $3072, %ecx # imm = 0xC00 +; X86-X87-NEXT: movw %cx, {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-X87-NEXT: movb %al, %cl -; X86-X87-NEXT: shll $31, %ecx -; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movzbl %al, %edx +; X86-X87-NEXT: shll $31, %edx +; X86-X87-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) ; X86-X87-NEXT: fucom %st(1) ; X86-X87-NEXT: fstp %st(1) ; X86-X87-NEXT: fnstsw %ax -; X86-X87-NEXT: xorl %esi, %esi +; X86-X87-NEXT: xorl %ecx, %ecx ; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax ; X86-X87-NEXT: sahf -; X86-X87-NEXT: movl $0, %edi +; X86-X87-NEXT: movl $0, %esi ; X86-X87-NEXT: jb .LBB37_4 ; X86-X87-NEXT: # %bb.3: -; X86-X87-NEXT: movl %ecx, %edi +; X86-X87-NEXT: movl %edx, %esi ; X86-X87-NEXT: .LBB37_4: ; X86-X87-NEXT: jb .LBB37_6 ; X86-X87-NEXT: # %bb.5: -; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-X87-NEXT: .LBB37_6: ; X86-X87-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} ; X86-X87-NEXT: fxch %st(1) @@ -3780,12 +3766,11 @@ ; X86-X87-NEXT: movl $-1, %edx ; X86-X87-NEXT: ja .LBB37_8 ; X86-X87-NEXT: # %bb.7: -; X86-X87-NEXT: movl %esi, %eax -; X86-X87-NEXT: movl %edi, %edx +; X86-X87-NEXT: movl %ecx, %eax +; X86-X87-NEXT: movl %esi, %edx ; X86-X87-NEXT: .LBB37_8: -; X86-X87-NEXT: addl $20, %esp +; X86-X87-NEXT: addl $16, %esp ; X86-X87-NEXT: popl %esi -; X86-X87-NEXT: popl %edi ; X86-X87-NEXT: retl ; ; X86-SSE-LABEL: test_unsigned_i64_f80: @@ -3794,15 +3779,15 @@ ; X86-SSE-NEXT: subl $16, %esp ; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp) ; X86-SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE-NEXT: fld %st(1) +; X86-SSE-NEXT: fsub %st(1), %st ; X86-SSE-NEXT: xorl %ecx, %ecx -; X86-SSE-NEXT: fucomi %st(1), %st -; X86-SSE-NEXT: setbe %bl -; X86-SSE-NEXT: fldz -; X86-SSE-NEXT: fld %st(0) -; X86-SSE-NEXT: fcmovbe %st(2), %st -; X86-SSE-NEXT: fstp %st(2) ; X86-SSE-NEXT: fxch %st(1) -; X86-SSE-NEXT: fsubr %st(2), %st +; X86-SSE-NEXT: fucompi %st(2), %st +; X86-SSE-NEXT: fld %st(1) +; X86-SSE-NEXT: fcmovbe %st(1), %st +; X86-SSE-NEXT: fstp %st(1) +; X86-SSE-NEXT: setbe %bl ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00 @@ -3811,6 +3796,7 @@ ; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: xorl %edx, %edx +; X86-SSE-NEXT: fldz ; X86-SSE-NEXT: fxch %st(1) ; X86-SSE-NEXT: fucomi %st(1), %st ; X86-SSE-NEXT: fstp %st(1) diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -336,11 +336,10 @@ ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 -; CHECK-I686-NEXT: jae .LBB9_2 +; CHECK-I686-NEXT: jb .LBB9_2 ; CHECK-I686-NEXT: # %bb.1: -; CHECK-I686-NEXT: xorps %xmm1, %xmm1 -; CHECK-I686-NEXT: .LBB9_2: ; CHECK-I686-NEXT: subss %xmm1, %xmm0 +; CHECK-I686-NEXT: .LBB9_2: ; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; CHECK-I686-NEXT: setae %al ; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) @@ -411,12 +410,14 @@ ; CHECK-I686-NEXT: pushl %esi ; CHECK-I686-NEXT: subl $24, %esp ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; CHECK-I686-NEXT: movq %xmm0, {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: shrl $31, %eax +; CHECK-I686-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; CHECK-I686-NEXT: leal 4(%eax), %ecx +; CHECK-I686-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: cmovnsl %eax, %ecx ; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; CHECK-I686-NEXT: fadds (%ecx) ; CHECK-I686-NEXT: fstps (%esp) ; CHECK-I686-NEXT: calll __truncsfhf2 ; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll --- a/llvm/test/CodeGen/X86/jump_sign.ll +++ b/llvm/test/CodeGen/X86/jump_sign.ll @@ -308,12 +308,11 @@ define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) { ; CHECK-LABEL: func_q: ; CHECK: # %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: sbbl %ecx, %ecx -; CHECK-NEXT: negl %eax -; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: subl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: notl %eax +; CHECK-NEXT: cmovbel %ecx, %eax ; CHECK-NEXT: retl %t1 = icmp ult i32 %a0, %a1 %t2 = sub i32 %a1, %a0 diff --git a/llvm/test/CodeGen/X86/lea-opt2.ll b/llvm/test/CodeGen/X86/lea-opt2.ll --- a/llvm/test/CodeGen/X86/lea-opt2.ll +++ b/llvm/test/CodeGen/X86/lea-opt2.ll @@ -189,11 +189,9 @@ ; CHECK-LABEL: test9: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: leaq (%rsi,%rdi), %rax -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: leaq 4096(%rax), %rcx ; CHECK-NEXT: testl $4095, %eax # imm = 0xFFF -; CHECK-NEXT: setne %cl -; CHECK-NEXT: shlq $12, %rcx -; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: cmoveq %rax, %rcx ; CHECK-NEXT: andq $-4096, %rcx # imm = 0xF000 ; CHECK-NEXT: addq %rcx, %rdi ; CHECK-NEXT: jmp bar@PLT # TAILCALL @@ -212,21 +210,20 @@ define void @test10() { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl (%rax), %eax -; CHECK-NEXT: movzwl (%rax), %ecx -; CHECK-NEXT: leal (%rcx,%rcx,2), %esi -; CHECK-NEXT: movl %ecx, %edi -; CHECK-NEXT: subl %ecx, %edi -; CHECK-NEXT: subl %ecx, %edi +; CHECK-NEXT: movl (%rax), %ecx +; CHECK-NEXT: movzwl (%rax), %eax +; CHECK-NEXT: leal (%rax,%rax), %edx +; CHECK-NEXT: leal (%rax,%rax,2), %esi +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: subl %edx, %eax ; CHECK-NEXT: negl %esi -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: cmpl $4, %eax -; CHECK-NEXT: movl %edi, (%rax) -; CHECK-NEXT: movl %esi, (%rax) -; CHECK-NEXT: cmovnel %eax, %ecx -; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: sarl %cl, %esi +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: sarl %cl, %edx +; CHECK-NEXT: cmpl $4, %ecx +; CHECK-NEXT: movl %eax, (%rax) ; CHECK-NEXT: movl %esi, (%rax) +; CHECK-NEXT: cmovel %esi, %edx +; CHECK-NEXT: movl %edx, (%rax) ; CHECK-NEXT: retq entry: %tmp = load i32, ptr undef, align 4 diff --git a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll --- a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll +++ b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll @@ -34,14 +34,13 @@ ; CHECK-NEXT: addss %xmm1, %xmm0 ; CHECK-NEXT: addss %xmm2, %xmm0 ; CHECK-NEXT: movss %xmm0, (%rax) -; CHECK-NEXT: testl %ebx, %ebx -; CHECK-NEXT: jne .LBB0_5 -; CHECK-NEXT: # %bb.4: # %if.end -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: .LBB0_5: # %if.end ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: addss %xmm0, %xmm0 +; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: je .LBB0_5 +; CHECK-NEXT: # %bb.4: ; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: .LBB0_5: # %if.end ; CHECK-NEXT: callq bar@PLT ; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 16 diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll --- a/llvm/test/CodeGen/X86/midpoint-int.ll +++ b/llvm/test/CodeGen/X86/midpoint-int.ll @@ -14,39 +14,40 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { ; X64-LABEL: scalar_i32_signed_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %esi, %edi -; X64-NEXT: setle %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: cmovgl %esi, %ecx -; X64-NEXT: cmovgl %edi, %esi -; X64-NEXT: subl %ecx, %esi -; X64-NEXT: shrl %esi -; X64-NEXT: imull %esi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cmovgl %esi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: cmovgl %edi, %ecx +; X64-NEXT: subl %eax, %ecx +; X64-NEXT: shrl %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: cmovlel %ecx, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i32_signed_reg_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB0_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB0_3 -; X86-NEXT: .LBB0_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB0_3: +; X86-NEXT: jg .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: .LBB0_2: ; X86-NEXT: subl %esi, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jle .LBB0_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB0_4: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -64,39 +65,40 @@ define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { ; X64-LABEL: scalar_i32_unsigned_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %esi, %edi -; X64-NEXT: setbe %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: cmoval %esi, %ecx -; X64-NEXT: cmoval %edi, %esi -; X64-NEXT: subl %ecx, %esi -; X64-NEXT: shrl %esi -; X64-NEXT: imull %esi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cmoval %esi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: cmoval %edi, %ecx +; X64-NEXT: subl %eax, %ecx +; X64-NEXT: shrl %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpl %esi, %edi +; X64-NEXT: cmovbel %ecx, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i32_unsigned_reg_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setbe %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: ja .LBB1_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB1_3 -; X86-NEXT: .LBB1_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB1_3: +; X86-NEXT: ja .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: .LBB1_2: ; X86-NEXT: subl %esi, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jbe .LBB1_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB1_4: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -117,40 +119,41 @@ ; X64-LABEL: scalar_i32_signed_mem_reg: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %esi, %ecx -; X64-NEXT: setle %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: cmovgl %esi, %edx -; X64-NEXT: cmovgl %ecx, %esi -; X64-NEXT: subl %edx, %esi -; X64-NEXT: shrl %esi -; X64-NEXT: imull %esi, %eax +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: cmovgl %esi, %eax +; X64-NEXT: movl %esi, %edx +; X64-NEXT: cmovgl %ecx, %edx +; X64-NEXT: subl %eax, %edx +; X64-NEXT: shrl %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpl %esi, %ecx +; X64-NEXT: cmovlel %edx, %eax ; X64-NEXT: addl %ecx, %eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i32_signed_mem_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB2_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB2_3 -; X86-NEXT: .LBB2_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl (%eax), %ecx +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB2_3: +; X86-NEXT: jg .LBB2_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: .LBB2_2: ; X86-NEXT: subl %esi, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jle .LBB2_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB2_4: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -169,17 +172,18 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind { ; X64-LABEL: scalar_i32_signed_reg_mem: ; X64: # %bb.0: -; X64-NEXT: movl (%rsi), %eax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpl %eax, %edi -; X64-NEXT: setle %cl -; X64-NEXT: leal -1(%rcx,%rcx), %ecx -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmovgl %eax, %edx -; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: subl %edx, %eax -; X64-NEXT: shrl %eax -; X64-NEXT: imull %ecx, %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: cmpl %ecx, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: cmovgl %edi, %edx +; X64-NEXT: subl %eax, %edx +; X64-NEXT: shrl %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpl %ecx, %edi +; X64-NEXT: cmovlel %edx, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: retq ; @@ -188,22 +192,22 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB3_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB3_3 -; X86-NEXT: .LBB3_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB3_3: +; X86-NEXT: jg .LBB3_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: .LBB3_2: ; X86-NEXT: subl %esi, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jle .LBB3_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB3_4: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -223,17 +227,18 @@ ; X64-LABEL: scalar_i32_signed_mem_mem: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpl %eax, %ecx -; X64-NEXT: setle %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx -; X64-NEXT: movl %ecx, %esi -; X64-NEXT: cmovgl %eax, %esi -; X64-NEXT: cmovgl %ecx, %eax -; X64-NEXT: subl %esi, %eax -; X64-NEXT: shrl %eax -; X64-NEXT: imull %edx, %eax +; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: cmovgl %edx, %eax +; X64-NEXT: movl %edx, %esi +; X64-NEXT: cmovgl %ecx, %esi +; X64-NEXT: subl %eax, %esi +; X64-NEXT: shrl %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: cmovlel %esi, %eax ; X64-NEXT: addl %ecx, %eax ; X64-NEXT: retq ; @@ -243,22 +248,22 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB4_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB4_3 -; X86-NEXT: .LBB4_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB4_3: +; X86-NEXT: jg .LBB4_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: .LBB4_2: ; X86-NEXT: subl %esi, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jle .LBB4_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB4_4: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -284,16 +289,17 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; X64-LABEL: scalar_i64_signed_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rsi, %rdi -; X64-NEXT: setle %al -; X64-NEXT: leaq -1(%rax,%rax), %rax -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: cmovgq %rsi, %rcx -; X64-NEXT: cmovgq %rdi, %rsi -; X64-NEXT: subq %rcx, %rsi -; X64-NEXT: shrq %rsi -; X64-NEXT: imulq %rsi, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: cmovgq %rsi, %rax +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: cmovgq %rdi, %rcx +; X64-NEXT: subq %rax, %rcx +; X64-NEXT: shrq %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmpq %rsi, %rdi +; X64-NEXT: cmovleq %rcx, %rax ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: retq ; @@ -303,41 +309,41 @@ ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: cmpl %esi, %ebx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sbbl %ebp, %eax +; X86-NEXT: setl %cl ; X86-NEXT: jl .LBB5_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %esi, %edx ; X86-NEXT: jmp .LBB5_3 ; X86-NEXT: .LBB5_1: -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %esi, %ebx ; X86-NEXT: .LBB5_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx +; X86-NEXT: subl %edx, %ebx +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: shrdl $1, %edi, %ebx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: testb %cl, %cl +; X86-NEXT: jne .LBB5_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edi, %edx +; X86-NEXT: .LBB5_5: +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl %ebp, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -357,16 +363,17 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; X64-LABEL: scalar_i64_unsigned_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rsi, %rdi -; X64-NEXT: setbe %al -; X64-NEXT: leaq -1(%rax,%rax), %rax -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: cmovaq %rsi, %rcx -; X64-NEXT: cmovaq %rdi, %rsi -; X64-NEXT: subq %rcx, %rsi -; X64-NEXT: shrq %rsi -; X64-NEXT: imulq %rsi, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: cmovaq %rsi, %rax +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: cmovaq %rdi, %rcx +; X64-NEXT: subq %rax, %rcx +; X64-NEXT: shrq %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmpq %rsi, %rdi +; X64-NEXT: cmovbeq %rcx, %rax ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: retq ; @@ -376,42 +383,41 @@ ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setb %dl -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: testb %dl, %dl -; X86-NEXT: jne .LBB6_1 +; X86-NEXT: cmpl %esi, %ebx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sbbl %ebp, %eax +; X86-NEXT: setb %cl +; X86-NEXT: jb .LBB6_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %esi, %edx ; X86-NEXT: jmp .LBB6_3 ; X86-NEXT: .LBB6_1: -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %esi, %ebx ; X86-NEXT: .LBB6_3: -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx +; X86-NEXT: subl %edx, %ebx +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: shrdl $1, %edi, %ebx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: testb %cl, %cl +; X86-NEXT: jne .LBB6_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edi, %edx +; X86-NEXT: .LBB6_5: +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl %ebp, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -434,66 +440,66 @@ ; X64-LABEL: scalar_i64_signed_mem_reg: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rsi, %rcx -; X64-NEXT: setle %al -; X64-NEXT: leaq -1(%rax,%rax), %rax -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: cmovgq %rsi, %rdx -; X64-NEXT: cmovgq %rcx, %rsi -; X64-NEXT: subq %rdx, %rsi -; X64-NEXT: shrq %rsi -; X64-NEXT: imulq %rsi, %rax +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: cmovgq %rsi, %rax +; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: cmovgq %rcx, %rdx +; X64-NEXT: subq %rax, %rdx +; X64-NEXT: shrq %rdx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmpq %rsi, %rcx +; X64-NEXT: cmovleq %rdx, %rax ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i64_signed_mem_reg: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: cmpl %esi, %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ecx, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %esi +; X86-NEXT: movl 4(%eax), %ecx +; X86-NEXT: cmpl %esi, %ebx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sbbl %ecx, %eax +; X86-NEXT: setl {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: jl .LBB7_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl %esi, %edx ; X86-NEXT: jmp .LBB7_3 ; X86-NEXT: .LBB7_1: -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, %ebx ; X86-NEXT: .LBB7_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %edx, %eax -; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx +; X86-NEXT: subl %edx, %ebx +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: shrdl $1, %edi, %ebx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: addl %edi, %edx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X86-NEXT: jne .LBB7_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edi, %edx +; X86-NEXT: .LBB7_5: ; X86-NEXT: addl %esi, %eax ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: addl $4, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl %a1 = load i64, ptr %a1_addr %t3 = icmp sgt i64 %a1, %a2 ; signed @@ -510,17 +516,18 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; X64-LABEL: scalar_i64_signed_reg_mem: ; X64: # %bb.0: -; X64-NEXT: movq (%rsi), %rax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpq %rax, %rdi -; X64-NEXT: setle %cl -; X64-NEXT: leaq -1(%rcx,%rcx), %rcx -; X64-NEXT: movq %rdi, %rdx -; X64-NEXT: cmovgq %rax, %rdx -; X64-NEXT: cmovgq %rdi, %rax -; X64-NEXT: subq %rdx, %rax -; X64-NEXT: shrq %rax -; X64-NEXT: imulq %rcx, %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: cmpq %rcx, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: cmovgq %rcx, %rax +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: cmovgq %rdi, %rdx +; X64-NEXT: subq %rax, %rdx +; X64-NEXT: shrq %rdx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmpq %rcx, %rdi +; X64-NEXT: cmovleq %rdx, %rax ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: retq ; @@ -530,42 +537,42 @@ ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %eax -; X86-NEXT: movl 4(%edx), %edi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %ebx +; X86-NEXT: movl 4(%eax), %edi +; X86-NEXT: cmpl %esi, %ebx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sbbl %ebp, %eax +; X86-NEXT: setl %cl ; X86-NEXT: jl .LBB8_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %esi, %edx ; X86-NEXT: jmp .LBB8_3 ; X86-NEXT: .LBB8_1: -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %esi, %ebx ; X86-NEXT: .LBB8_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx +; X86-NEXT: subl %edx, %ebx +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: shrdl $1, %edi, %ebx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: testb %cl, %cl +; X86-NEXT: jne .LBB8_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edi, %edx +; X86-NEXT: .LBB8_5: +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl %ebp, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -587,68 +594,68 @@ ; X64-LABEL: scalar_i64_signed_mem_mem: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rax, %rcx -; X64-NEXT: setle %dl -; X64-NEXT: leaq -1(%rdx,%rdx), %rdx -; X64-NEXT: movq %rcx, %rsi -; X64-NEXT: cmovgq %rax, %rsi -; X64-NEXT: cmovgq %rcx, %rax -; X64-NEXT: subq %rsi, %rax -; X64-NEXT: shrq %rax -; X64-NEXT: imulq %rdx, %rax +; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: cmovgq %rdx, %rax +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: cmovgq %rcx, %rsi +; X64-NEXT: subq %rax, %rsi +; X64-NEXT: shrq %rsi +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: cmovleq %rsi, %rax ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i64_signed_mem_mem: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %esi -; X86-NEXT: movl 4(%eax), %ecx -; X86-NEXT: movl (%edx), %eax -; X86-NEXT: movl 4(%edx), %edi -; X86-NEXT: cmpl %esi, %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ecx, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %esi +; X86-NEXT: movl 4(%ecx), %ecx +; X86-NEXT: movl (%eax), %ebx +; X86-NEXT: movl 4(%eax), %edi +; X86-NEXT: cmpl %esi, %ebx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: sbbl %ecx, %eax +; X86-NEXT: setl {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; X86-NEXT: jl .LBB9_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: movl %esi, %edx ; X86-NEXT: jmp .LBB9_3 ; X86-NEXT: .LBB9_1: -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: movl %eax, %edx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, %ebx ; X86-NEXT: .LBB9_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %edx, %eax -; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx +; X86-NEXT: subl %edx, %ebx +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: shrdl $1, %edi, %ebx ; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: addl %edi, %edx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X86-NEXT: jne .LBB9_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edi, %edx +; X86-NEXT: .LBB9_5: ; X86-NEXT: addl %esi, %eax ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: addl $4, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl %a1 = load i64, ptr %a1_addr %a2 = load i64, ptr %a2_addr @@ -672,17 +679,18 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; X64-LABEL: scalar_i16_signed_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpw %si, %di -; X64-NEXT: setle %al -; X64-NEXT: leal -1(%rax,%rax), %ecx ; X64-NEXT: movl %edi, %eax ; X64-NEXT: cmovgl %esi, %eax -; X64-NEXT: cmovgl %edi, %esi -; X64-NEXT: subl %eax, %esi -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: shrl %eax -; X64-NEXT: imull %ecx, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: cmovgl %edi, %ecx +; X64-NEXT: subl %eax, %ecx +; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: shrl %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpw %si, %di +; X64-NEXT: cmovlel %ecx, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -690,24 +698,26 @@ ; X86-LABEL: scalar_i16_signed_reg_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: cmpw %dx, %cx ; X86-NEXT: jg .LBB10_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %esi ; X86-NEXT: jmp .LBB10_3 ; X86-NEXT: .LBB10_1: -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: .LBB10_3: -; X86-NEXT: subl %esi, %eax -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: subl %eax, %esi +; X86-NEXT: movzwl %si, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: jle .LBB10_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB10_5: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi @@ -726,17 +736,18 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; X64-LABEL: scalar_i16_unsigned_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpw %si, %di -; X64-NEXT: setbe %al -; X64-NEXT: leal -1(%rax,%rax), %ecx ; X64-NEXT: movl %edi, %eax ; X64-NEXT: cmoval %esi, %eax -; X64-NEXT: cmoval %edi, %esi -; X64-NEXT: subl %eax, %esi -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: shrl %eax -; X64-NEXT: imull %ecx, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: cmoval %edi, %ecx +; X64-NEXT: subl %eax, %ecx +; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: shrl %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpw %si, %di +; X64-NEXT: cmovbel %ecx, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -744,24 +755,26 @@ ; X86-LABEL: scalar_i16_unsigned_reg_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setbe %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: cmpw %dx, %cx ; X86-NEXT: ja .LBB11_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %esi ; X86-NEXT: jmp .LBB11_3 ; X86-NEXT: .LBB11_1: -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: .LBB11_3: -; X86-NEXT: subl %esi, %eax -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: subl %eax, %esi +; X86-NEXT: movzwl %si, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: jbe .LBB11_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB11_5: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi @@ -783,17 +796,18 @@ ; X64-LABEL: scalar_i16_signed_mem_reg: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpw %si, %cx -; X64-NEXT: setle %al -; X64-NEXT: leal -1(%rax,%rax), %edx ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: cmovgl %esi, %eax -; X64-NEXT: cmovgl %ecx, %esi -; X64-NEXT: subl %eax, %esi -; X64-NEXT: movzwl %si, %eax -; X64-NEXT: shrl %eax -; X64-NEXT: imull %edx, %eax +; X64-NEXT: movl %esi, %edx +; X64-NEXT: cmovgl %ecx, %edx +; X64-NEXT: subl %eax, %edx +; X64-NEXT: movzwl %dx, %edx +; X64-NEXT: shrl %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpw %si, %cx +; X64-NEXT: cmovlel %edx, %eax ; X64-NEXT: addl %ecx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -801,25 +815,27 @@ ; X86-LABEL: scalar_i16_signed_mem_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: movzwl (%eax), %ecx +; X86-NEXT: cmpw %dx, %cx ; X86-NEXT: jg .LBB12_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %esi ; X86-NEXT: jmp .LBB12_3 ; X86-NEXT: .LBB12_1: -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: .LBB12_3: -; X86-NEXT: subl %esi, %eax -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: subl %eax, %esi +; X86-NEXT: movzwl %si, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: jle .LBB12_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB12_5: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi @@ -839,18 +855,19 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind { ; X64-LABEL: scalar_i16_signed_reg_mem: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rsi), %eax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpw %ax, %di -; X64-NEXT: setle %cl -; X64-NEXT: leal -1(%rcx,%rcx), %ecx -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmovgl %eax, %edx -; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: subl %edx, %eax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: shrl %eax -; X64-NEXT: imull %ecx, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: cmpw %cx, %di +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: cmovgl %edi, %edx +; X64-NEXT: subl %eax, %edx +; X64-NEXT: movzwl %dx, %edx +; X64-NEXT: shrl %edx +; X64-NEXT: movl %edx, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpw %cx, %di +; X64-NEXT: cmovlel %edx, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -860,23 +877,25 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: movzwl (%eax), %edx +; X86-NEXT: cmpw %dx, %cx ; X86-NEXT: jg .LBB13_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %esi ; X86-NEXT: jmp .LBB13_3 ; X86-NEXT: .LBB13_1: -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: .LBB13_3: -; X86-NEXT: subl %esi, %eax -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: subl %eax, %esi +; X86-NEXT: movzwl %si, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: jle .LBB13_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB13_5: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi @@ -897,18 +916,19 @@ ; X64-LABEL: scalar_i16_signed_mem_mem: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: movzwl (%rsi), %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpw %ax, %cx -; X64-NEXT: setle %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx -; X64-NEXT: movl %ecx, %esi -; X64-NEXT: cmovgl %eax, %esi -; X64-NEXT: cmovgl %ecx, %eax -; X64-NEXT: subl %esi, %eax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: shrl %eax -; X64-NEXT: imull %edx, %eax +; X64-NEXT: movzwl (%rsi), %edx +; X64-NEXT: cmpw %dx, %cx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: cmovgl %edx, %eax +; X64-NEXT: movl %edx, %esi +; X64-NEXT: cmovgl %ecx, %esi +; X64-NEXT: subl %eax, %esi +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: shrl %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmpw %dx, %cx +; X64-NEXT: cmovlel %esi, %eax ; X64-NEXT: addl %ecx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -919,23 +939,25 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: movzwl (%eax), %edx +; X86-NEXT: cmpw %dx, %cx ; X86-NEXT: jg .LBB14_1 ; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %esi ; X86-NEXT: jmp .LBB14_3 ; X86-NEXT: .LBB14_1: -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: .LBB14_3: -; X86-NEXT: subl %esi, %eax -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: subl %eax, %esi +; X86-NEXT: movzwl %si, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: jle .LBB14_5 +; X86-NEXT: # %bb.4: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB14_5: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi @@ -962,40 +984,41 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; X64-LABEL: scalar_i8_signed_reg_reg: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil -; X64-NEXT: setg %cl -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmovgl %esi, %edx -; X64-NEXT: cmovgl %edi, %eax +; X64-NEXT: cmpb %sil, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cmovgl %esi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: cmovgl %edi, %ecx +; X64-NEXT: subb %al, %cl +; X64-NEXT: shrb %cl +; X64-NEXT: movzbl %cl, %edx ; X64-NEXT: negb %cl -; X64-NEXT: orb $1, %cl -; X64-NEXT: subb %dl, %al -; X64-NEXT: shrb %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: mulb %cl +; X64-NEXT: cmpb %sil, %dil +; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: cmovlel %edx, %eax ; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_signed_reg_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setg %dl -; X86-NEXT: jg .LBB15_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB15_3 -; X86-NEXT: .LBB15_1: -; X86-NEXT: movb %al, %ah +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: movb %dl, %ah ; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB15_3: +; X86-NEXT: jg .LBB15_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movb %cl, %ah +; X86-NEXT: movb %dl, %al +; X86-NEXT: .LBB15_2: ; X86-NEXT: subb %ah, %al -; X86-NEXT: negb %dl -; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al -; X86-NEXT: mulb %dl +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: jle .LBB15_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negb %al +; X86-NEXT: .LBB15_4: ; X86-NEXT: addb %cl, %al ; X86-NEXT: retl %t3 = icmp sgt i8 %a1, %a2 ; signed @@ -1012,40 +1035,41 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; X64-LABEL: scalar_i8_unsigned_reg_reg: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil -; X64-NEXT: seta %cl -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmoval %esi, %edx -; X64-NEXT: cmoval %edi, %eax +; X64-NEXT: cmpb %sil, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cmoval %esi, %eax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: cmoval %edi, %ecx +; X64-NEXT: subb %al, %cl +; X64-NEXT: shrb %cl +; X64-NEXT: movzbl %cl, %edx ; X64-NEXT: negb %cl -; X64-NEXT: orb $1, %cl -; X64-NEXT: subb %dl, %al -; X64-NEXT: shrb %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: mulb %cl +; X64-NEXT: cmpb %sil, %dil +; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: cmovbel %edx, %eax ; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_unsigned_reg_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpb %al, %cl -; X86-NEXT: seta %dl -; X86-NEXT: ja .LBB16_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB16_3 -; X86-NEXT: .LBB16_1: -; X86-NEXT: movb %al, %ah +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: movb %dl, %ah ; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB16_3: +; X86-NEXT: ja .LBB16_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movb %cl, %ah +; X86-NEXT: movb %dl, %al +; X86-NEXT: .LBB16_2: ; X86-NEXT: subb %ah, %al -; X86-NEXT: negb %dl -; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al -; X86-NEXT: mulb %dl +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: jbe .LBB16_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negb %al +; X86-NEXT: .LBB16_4: ; X86-NEXT: addb %cl, %al ; X86-NEXT: retl %t3 = icmp ugt i8 %a1, %a2 @@ -1066,40 +1090,41 @@ ; X64: # %bb.0: ; X64-NEXT: movzbl (%rdi), %ecx ; X64-NEXT: cmpb %sil, %cl -; X64-NEXT: setg %dl -; X64-NEXT: movl %ecx, %edi -; X64-NEXT: cmovgl %esi, %edi ; X64-NEXT: movl %ecx, %eax -; X64-NEXT: cmovlel %esi, %eax +; X64-NEXT: cmovgl %esi, %eax +; X64-NEXT: movl %esi, %edx +; X64-NEXT: cmovgl %ecx, %edx +; X64-NEXT: subb %al, %dl +; X64-NEXT: shrb %dl +; X64-NEXT: movzbl %dl, %edi ; X64-NEXT: negb %dl -; X64-NEXT: orb $1, %dl -; X64-NEXT: subb %dil, %al -; X64-NEXT: shrb %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: mulb %dl +; X64-NEXT: cmpb %sil, %cl +; X64-NEXT: movzbl %dl, %eax +; X64-NEXT: cmovlel %edi, %eax ; X64-NEXT: addb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_signed_mem_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl (%ecx), %ecx -; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setg %dl -; X86-NEXT: jg .LBB17_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB17_3 -; X86-NEXT: .LBB17_1: -; X86-NEXT: movb %al, %ah +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl (%eax), %ecx +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: movb %dl, %ah ; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB17_3: +; X86-NEXT: jg .LBB17_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movb %cl, %ah +; X86-NEXT: movb %dl, %al +; X86-NEXT: .LBB17_2: ; X86-NEXT: subb %ah, %al -; X86-NEXT: negb %dl -; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al -; X86-NEXT: mulb %dl +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: jle .LBB17_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negb %al +; X86-NEXT: .LBB17_4: ; X86-NEXT: addb %cl, %al ; X86-NEXT: retl %a1 = load i8, ptr %a1_addr @@ -1119,39 +1144,41 @@ ; X64: # %bb.0: ; X64-NEXT: movzbl (%rsi), %eax ; X64-NEXT: cmpb %al, %dil -; X64-NEXT: setg %cl -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmovgl %eax, %edx -; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: negb %cl -; X64-NEXT: orb $1, %cl -; X64-NEXT: subb %dl, %al -; X64-NEXT: shrb %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: mulb %cl +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: cmovgl %eax, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: cmovgl %edi, %edx +; X64-NEXT: subb %cl, %dl +; X64-NEXT: shrb %dl +; X64-NEXT: movzbl %dl, %ecx +; X64-NEXT: negb %dl +; X64-NEXT: cmpb %al, %dil +; X64-NEXT: movzbl %dl, %eax +; X64-NEXT: cmovlel %ecx, %eax ; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_signed_reg_mem: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax -; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setg %dl -; X86-NEXT: jg .LBB18_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB18_3 -; X86-NEXT: .LBB18_1: -; X86-NEXT: movb %al, %ah +; X86-NEXT: movzbl (%eax), %edx +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: movb %dl, %ah ; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB18_3: +; X86-NEXT: jg .LBB18_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movb %cl, %ah +; X86-NEXT: movb %dl, %al +; X86-NEXT: .LBB18_2: ; X86-NEXT: subb %ah, %al -; X86-NEXT: negb %dl -; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al -; X86-NEXT: mulb %dl +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: jle .LBB18_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negb %al +; X86-NEXT: .LBB18_4: ; X86-NEXT: addb %cl, %al ; X86-NEXT: retl %a2 = load i8, ptr %a2_addr @@ -1172,17 +1199,19 @@ ; X64-NEXT: movzbl (%rdi), %ecx ; X64-NEXT: movzbl (%rsi), %eax ; X64-NEXT: cmpb %al, %cl -; X64-NEXT: setg %dl -; X64-NEXT: movl %ecx, %esi -; X64-NEXT: cmovgl %eax, %esi -; X64-NEXT: cmovgl %ecx, %eax -; X64-NEXT: negb %dl -; X64-NEXT: orb $1, %dl -; X64-NEXT: subb %sil, %al -; X64-NEXT: shrb %al -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: mulb %dl +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: cmovgl %eax, %edx +; X64-NEXT: movl %eax, %esi +; X64-NEXT: cmovgl %ecx, %esi +; X64-NEXT: subb %dl, %sil +; X64-NEXT: shrb %sil +; X64-NEXT: movzbl %sil, %edx +; X64-NEXT: negb %sil +; X64-NEXT: cmpb %al, %cl +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: cmovlel %edx, %eax ; X64-NEXT: addb %cl, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_signed_mem_mem: @@ -1190,22 +1219,22 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl (%ecx), %ecx -; X86-NEXT: movzbl (%eax), %eax -; X86-NEXT: cmpb %al, %cl -; X86-NEXT: setg %dl -; X86-NEXT: jg .LBB19_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB19_3 -; X86-NEXT: .LBB19_1: -; X86-NEXT: movb %al, %ah +; X86-NEXT: movzbl (%eax), %edx +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: movb %dl, %ah ; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB19_3: +; X86-NEXT: jg .LBB19_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movb %cl, %ah +; X86-NEXT: movb %dl, %al +; X86-NEXT: .LBB19_2: ; X86-NEXT: subb %ah, %al -; X86-NEXT: negb %dl -; X86-NEXT: orb $1, %dl ; X86-NEXT: shrb %al -; X86-NEXT: mulb %dl +; X86-NEXT: cmpb %dl, %cl +; X86-NEXT: jle .LBB19_4 +; X86-NEXT: # %bb.3: +; X86-NEXT: negb %al +; X86-NEXT: .LBB19_4: ; X86-NEXT: addb %cl, %al ; X86-NEXT: retl %a1 = load i8, ptr %a1_addr diff --git a/llvm/test/CodeGen/X86/pic.ll b/llvm/test/CodeGen/X86/pic.ll --- a/llvm/test/CodeGen/X86/pic.ll +++ b/llvm/test/CodeGen/X86/pic.ll @@ -1,10 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,CHECK-I686 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux-gnux32 -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,CHECK-X32 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux-gnux32 -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false -fast-isel -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,CHECK-X32 -@ptr = external global ptr -@dst = external global i32 -@src = external global i32 +@ptr = external global ptr +@dst = external global i32 +@src = external global i32 define void @test0() nounwind { entry: @@ -12,20 +13,7 @@ %tmp.s = load i32, ptr @src store i32 %tmp.s, ptr @dst ret void - -; CHECK-LABEL: test0: -; CHECK-I686: calll .L0$pb -; CHECK-I686-NEXT: .L0$pb: -; CHECK-I686-NEXT: popl -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L0$pb), -; CHECK-I686: movl dst@GOT(%eax), -; CHECK-I686: movl ptr@GOT(%eax), -; CHECK-I686: movl src@GOT(%eax), -; CHECK-I686: ret -; CHECK-X32-DAG: movl dst@GOTPCREL(%rip), -; CHECK-X32-DAG: movl ptr@GOTPCREL(%rip), -; CHECK-X32-DAG: movl src@GOTPCREL(%rip), -; CHECK-X32: retq + } @ptr2 = global ptr null @@ -38,20 +26,7 @@ %tmp.s = load i32, ptr @src2 store i32 %tmp.s, ptr @dst2 ret void - -; CHECK-LABEL: test1: -; CHECK-I686: calll .L1$pb -; CHECK-I686-NEXT: .L1$pb: -; CHECK-I686-NEXT: popl -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb), %eax -; CHECK-I686: movl dst2@GOT(%eax), -; CHECK-I686: movl ptr2@GOT(%eax), -; CHECK-I686: movl src2@GOT(%eax), -; CHECK-I686: ret -; CHECK-X32-DAG: movl dst2@GOTPCREL(%rip), -; CHECK-X32-DAG: movl ptr2@GOTPCREL(%rip), -; CHECK-X32-DAG: movl src2@GOTPCREL(%rip), -; CHECK-X32: retq + } @@ -61,27 +36,10 @@ entry: %ptr = call ptr @malloc(i32 40) ret void -; CHECK-LABEL: test2: -; CHECK-I686: pushl %ebx -; CHECK-I686-NEXT: subl $8, %esp -; CHECK-I686-NEXT: calll .L2$pb -; CHECK-I686-NEXT: .L2$pb: -; CHECK-I686-NEXT: popl %ebx -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %ebx -; CHECK-I686: movl $40, (%esp) -; CHECK-I686: calll malloc@PLT -; CHECK-I686: addl $8, %esp -; CHECK-I686: popl %ebx -; CHECK-I686: ret -; CHECK-X32: pushq %rax -; CHECK-X32: movl $40, %edi -; CHECK-X32: callq malloc@PLT -; CHECK-X32: popq %rax -; CHECK-X32: retq } -@pfoo = external global ptr +@pfoo = external global ptr define void @test3() nounwind { entry: @@ -90,17 +48,6 @@ %tmp1 = load ptr, ptr @pfoo call void(...) %tmp1() ret void -; CHECK-LABEL: test3: -; CHECK-I686: calll .L3$pb -; CHECK-I686-NEXT: .L3$pb: -; CHECK-I686: popl -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]] -; CHECK-I686: calll afoo@PLT -; CHECK-I686: movl pfoo@GOT(%[[REG3]]), -; CHECK-I686: calll * -; CHECK-X32: callq afoo@PLT -; CHECK-X32: movl pfoo@GOTPCREL(%rip), -; CHECK-X32: callq * } declare ptr @afoo(...) @@ -109,12 +56,6 @@ entry: call void(...) @foo() ret void -; CHECK-LABEL: test4: -; CHECK-I686: calll .L4$pb -; CHECK-I686: popl %ebx -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx -; CHECK-I686: calll foo@PLT -; CHECK-X32: callq foo@PLT } @@ -131,22 +72,7 @@ %tmp.s = load i32, ptr @src6 store i32 %tmp.s, ptr @dst6 ret void - -; CHECK-LABEL: test5: -; CHECK-I686: calll .L5$pb -; CHECK-I686-NEXT: .L5$pb: -; CHECK-I686-NEXT: popl %eax -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %eax -; CHECK-I686: leal dst6@GOTOFF(%eax), %ecx -; CHECK-I686: movl %ecx, ptr6@GOTOFF(%eax) -; CHECK-I686: movl src6@GOTOFF(%eax), %ecx -; CHECK-I686: movl %ecx, dst6@GOTOFF(%eax) -; CHECK-I686: ret -; CHECK-X32: leal dst6(%rip), %eax -; CHECK-X32: movl %eax, ptr6(%rip) -; CHECK-X32: movl src6(%rip), %eax -; CHECK-X32: movl %eax, dst6(%rip) -; CHECK-X32: retq + } @@ -157,14 +83,7 @@ %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02 ret double %retval -; CHECK: .LCPI6_0: -; CHECK-LABEL: test6: -; CHECK-I686: calll .L6$pb -; CHECK-I686: .L6$pb: -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb), -; CHECK-I686: fldl .LCPI6_0@GOTOFF( -; CHECK-X32: .LCPI6_0(%rip), } @@ -211,39 +130,8 @@ bb12: tail call void(...) @foo6() ret void - -; CHECK-LABEL: test7: -; CHECK-I686: calll .L7$pb -; CHECK-I686: .L7$pb: -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb), -; CHECK-I686: .LJTI7_0@GOTOFF( -; CHECK-I686: jmpl * -; CHECK-X32: leal .LJTI7_0(%rip), %eax -; CHECK-X32: addl (%eax,%edi,4), %eax -; CHECK-X32: jmpq *%rax - -; CHECK: .p2align 2 -; CHECK-NEXT: .LJTI7_0: -; CHECK-I686: .long .LBB7_2@GOTOFF -; CHECK-I686: .long .LBB7_8@GOTOFF -; CHECK-I686: .long .LBB7_4@GOTOFF -; CHECK-I686: .long .LBB7_6@GOTOFF -; CHECK-I686: .long .LBB7_5@GOTOFF -; CHECK-I686: .long .LBB7_8@GOTOFF -; CHECK-I686: .long .LBB7_7@GOTOFF -; CHECK-X32: .long .LBB7_3-.LJTI7_0 -; CHECK-X32: .long .LBB7_3-.LJTI7_0 -; CHECK-X32: .long .LBB7_12-.LJTI7_0 -; CHECK-X32: .long .LBB7_8-.LJTI7_0 -; CHECK-X32: .long .LBB7_12-.LJTI7_0 -; CHECK-X32: .long .LBB7_10-.LJTI7_0 -; CHECK-X32: .long .LBB7_8-.LJTI7_0 -; CHECK-X32: .long .LBB7_9-.LJTI7_0 -; CHECK-X32: .long .LBB7_10-.LJTI7_0 -; CHECK-X32: .long .LBB7_9-.LJTI7_0 -; CHECK-X32: .long .LBB7_12-.LJTI7_0 -; CHECK-X32: .long .LBB7_14-.LJTI7_0 -; CHECK-X32: .long .LBB7_14-.LJTI7_0 + + } declare void @foo1(...) @@ -274,26 +162,6 @@ store i32 %tmp.s, ptr @tlsdstgd ret void -; CHECK-LABEL: test8: -; CHECK-I686: calll .L8$pb -; CHECK-I686-NEXT: .L8$pb: -; CHECK-I686-NEXT: popl -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L8$pb), %ebx -; CHECK-I686-DAG: leal tlsdstgd@TLSGD(,%ebx), %eax -; CHECK-I686-DAG: calll ___tls_get_addr@PLT -; CHECK-I686-DAG: leal tlsptrgd@TLSGD(,%ebx), %eax -; CHECK-I686-DAG: calll ___tls_get_addr@PLT -; CHECK-I686-DAG: leal tlssrcgd@TLSGD(,%ebx), %eax -; CHECK-I686-DAG: calll ___tls_get_addr@PLT -; CHECK-X32-NOT: data16 -; CHECK-X32-DAG: leaq tlsdstgd@TLSGD(%rip), %rdi -; CHECK-X32-DAG: callq __tls_get_addr@PLT -; CHECK-X32-DAG: leaq tlsptrgd@TLSGD(%rip), %rdi -; CHECK-X32-DAG: callq __tls_get_addr@PLT -; CHECK-X32-DAG: leaq tlssrcgd@TLSGD(%rip), %rdi -; CHECK-X32-DAG: callq __tls_get_addr@PLT -; CHECK-I686: ret -; CHECK-X32: retq } define void @test9() nounwind { @@ -303,21 +171,6 @@ store i32 %tmp.s, ptr @tlsdstld ret void -; CHECK-LABEL: test9: -; CHECK-I686: calll .L9$pb -; CHECK-I686-NEXT: .L9$pb: -; CHECK-I686-NEXT: popl -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L9$pb), %ebx -; CHECK-I686: leal tlsdstld@TLSLDM(%ebx), %eax -; CHECK-X32: leaq tlsdstld@TLSLD(%rip), %rdi -; CHECK-I686: calll ___tls_get_addr@PLT -; CHECK-X32: callq __tls_get_addr@PLT -; CHECK: leal tlsdstld@DTPOFF( -; CHECK: movl {{%.*}}, tlsptrld@DTPOFF( -; CHECK: movl tlssrcld@DTPOFF( -; CHECK: movl {{%.*}}, tlsdstld@DTPOFF( -; CHECK-I686: ret -; CHECK-X32: retq } define void @test10() nounwind { @@ -327,29 +180,6 @@ store i32 %tmp.s, ptr @tlsdstie ret void -; CHECK-LABEL: test10: -; CHECK-I686: calll .L10$pb -; CHECK-I686-NEXT: .L10$pb: -; CHECK-I686-NEXT: popl -; CHECK-I686: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L10$pb), -; CHECK-I686-DAG: movl tlsdstie@GOTNTPOFF( -; CHECK-I686-DAG: movl %gs:0, -; CHECK-X32-DAG: movl tlsdstie@GOTTPOFF(%rip), -; CHECK-X32-DAG: movl %fs:0, -; CHECK-I686: addl -; CHECK-X32: leal ({{%.*,%.*}}), -; CHECK-I686: movl tlsptrie@GOTNTPOFF( -; CHECK-X32: movl tlsptrie@GOTTPOFF(%rip), -; CHECK-I686: movl {{%.*}}, %gs:( -; CHECK-X32: movl {{%.*}}, ({{%.*,%.*}}) -; CHECK-I686: movl tlssrcie@GOTNTPOFF( -; CHECK-X32: movl tlssrcie@GOTTPOFF(%rip), -; CHECK-I686: movl %gs:( -; CHECK-X32: movl ({{%.*,%.*}}), -; CHECK-I686: movl {{%.*}}, %gs:( -; CHECK-X32: movl {{%.*}}, ({{%.*,%.*}}) -; CHECK-I686: ret -; CHECK-X32: retq } define void @test11() nounwind { @@ -359,17 +189,8 @@ store i32 %tmp.s, ptr @tlsdstle ret void -; CHECK-LABEL: test11: -; CHECK-I686: movl %gs:0, -; CHECK-X32: movl %fs:0, -; CHECK-I686: leal tlsdstle@NTPOFF( -; CHECK-X32: leal tlsdstle@TPOFF( -; CHECK-I686: movl {{%.*}}, %gs:tlsptrle@NTPOFF -; CHECK-X32: movl {{%.*}}, %fs:tlsptrle@TPOFF -; CHECK-I686: movl %gs:tlssrcle@NTPOFF, -; CHECK-X32: movl %fs:tlssrcle@TPOFF, -; CHECK-I686: movl {{%.*}}, %gs:tlsdstle@NTPOFF -; CHECK-X32: movl {{%.*}}, %fs:tlsdstle@TPOFF -; CHECK-I686: ret -; CHECK-X32: retq } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} +; CHECK-I686: {{.*}} +; CHECK-X32: {{.*}} diff --git a/llvm/test/CodeGen/X86/pr15309.ll b/llvm/test/CodeGen/X86/pr15309.ll --- a/llvm/test/CodeGen/X86/pr15309.ll +++ b/llvm/test/CodeGen/X86/pr15309.ll @@ -7,24 +7,35 @@ ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $20, %esp -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl 168(%ecx), %edx -; CHECK-NEXT: movl 172(%ecx), %esi -; CHECK-NEXT: movl 160(%ecx), %edi -; CHECK-NEXT: movl 164(%ecx), %ecx -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %edi, (%esp) -; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl 172(%ecx), %eax +; CHECK-NEXT: movl 160(%ecx), %esi +; CHECK-NEXT: movl 164(%ecx), %edi +; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) -; CHECK-NEXT: shrl $31, %ecx +; CHECK-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %ecx +; CHECK-NEXT: addl $4, %ecx +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl %ecx, %esi +; CHECK-NEXT: js .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %esi +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: fildll (%esp) -; CHECK-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) -; CHECK-NEXT: shrl $31, %esi +; CHECK-NEXT: fadds (%esi) +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: js .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %ecx +; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: fildll {{[0-9]+}}(%esp) -; CHECK-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%esi,4) -; CHECK-NEXT: fstps 84(%eax) -; CHECK-NEXT: fstps 80(%eax) +; CHECK-NEXT: fadds (%ecx) +; CHECK-NEXT: fstps 84(%edx) +; CHECK-NEXT: fstps 80(%edx) ; CHECK-NEXT: addl $20, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/pr22338.ll b/llvm/test/CodeGen/X86/pr22338.ll --- a/llvm/test/CodeGen/X86/pr22338.ll +++ b/llvm/test/CodeGen/X86/pr22338.ll @@ -5,51 +5,54 @@ define i32 @fn(i32 %a0, i32 %a1) { ; X86-LABEL: fn: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebx, -8 -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $1, {{[0-9]+}}(%esp) -; X86-NEXT: sete %cl -; X86-NEXT: setne %al -; X86-NEXT: cmpl $1, {{[0-9]+}}(%esp) -; X86-NEXT: sete %dl -; X86-NEXT: negl %eax -; X86-NEXT: addb %cl, %cl -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: addb %dl, %dl -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: shll %cl, %eax +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: cmpl $1, %edx +; X86-NEXT: setne %cl +; X86-NEXT: andl $1, %ecx +; X86-NEXT: negl %ecx +; X86-NEXT: leal (,%ecx,4), %eax +; X86-NEXT: cmpl $1, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: jne .LBB0_2 +; X86-NEXT: # %bb.1: # %entry +; X86-NEXT: movl %eax, %edx +; X86-NEXT: .LBB0_2: # %entry +; X86-NEXT: cmpl $1, %esi +; X86-NEXT: je .LBB0_4 +; X86-NEXT: # %bb.3: # %entry +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB0_1: # %bb1 +; X86-NEXT: .LBB0_4: # %bb1 ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: je .LBB0_1 -; X86-NEXT: # %bb.2: # %bb2 -; X86-NEXT: popl %ebx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: je .LBB0_4 +; X86-NEXT: # %bb.5: # %bb2 +; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: fn: ; X64: # %bb.0: # %entry -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpl $1, %edi -; X64-NEXT: sete %cl -; X64-NEXT: setne %al +; X64-NEXT: setne %dl +; X64-NEXT: andl $1, %edx +; X64-NEXT: negl %edx +; X64-NEXT: leal (,%rdx,4), %eax +; X64-NEXT: cmpl $1, %edi +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: cmovnel %edx, %ecx ; X64-NEXT: cmpl $1, %esi -; X64-NEXT: sete %dl -; X64-NEXT: negl %eax -; X64-NEXT: addb %cl, %cl -; X64-NEXT: movl %eax, %esi -; X64-NEXT: shll %cl, %esi -; X64-NEXT: addb %dl, %dl -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: shll %cl, %eax +; X64-NEXT: cmovnel %edx, %eax ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB0_1: # %bb1 ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: testl %esi, %esi +; X64-NEXT: testl %ecx, %ecx ; X64-NEXT: je .LBB0_1 ; X64-NEXT: # %bb.2: # %bb2 ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr44396.ll b/llvm/test/CodeGen/X86/pr44396.ll --- a/llvm/test/CodeGen/X86/pr44396.ll +++ b/llvm/test/CodeGen/X86/pr44396.ll @@ -7,6 +7,8 @@ define double @c() nounwind { ; CHECK-LABEL: c: ; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: movl _b, %eax @@ -14,14 +16,16 @@ ; CHECK-NEXT: sarl $31, %ecx ; CHECK-NEXT: movl _a+4, %edx ; CHECK-NEXT: movl _a, %esi +; CHECK-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edi +; CHECK-NEXT: leal 4(%edi), %ebx ; CHECK-NEXT: subl %eax, %esi ; CHECK-NEXT: sbbl %ecx, %edx ; CHECK-NEXT: setb %al +; CHECK-NEXT: cmovnsl %edi, %ebx ; CHECK-NEXT: movl %esi, (%esp) ; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) -; CHECK-NEXT: shrl $31, %edx ; CHECK-NEXT: fildll (%esp) -; CHECK-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%edx,4) +; CHECK-NEXT: fadds (%ebx) ; CHECK-NEXT: fstpl {{[0-9]+}}(%esp) ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) ; CHECK-NEXT: fldz @@ -31,6 +35,8 @@ ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx ; CHECK-NEXT: retl entry: %0 = load i32, ptr @b, align 4 diff --git a/llvm/test/CodeGen/X86/pr47482.ll b/llvm/test/CodeGen/X86/pr47482.ll --- a/llvm/test/CodeGen/X86/pr47482.ll +++ b/llvm/test/CodeGen/X86/pr47482.ll @@ -10,17 +10,17 @@ ; CHECK-NEXT: movl a(%rip), %eax ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl (%rdi), %ecx +; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: orl $2, %edi ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: sete %cl -; CHECK-NEXT: addl %ecx, %ecx -; CHECK-NEXT: orl (%rdi), %ecx +; CHECK-NEXT: cmovnel %ecx, %edi ; CHECK-NEXT: movl $0, (%rsi) ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: shll $8, %eax ; CHECK-NEXT: bextrl %eax, f(%rip), %eax -; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: movl %eax, (%rdx) ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll --- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -69,12 +69,12 @@ ; X86-AVX512F-WIN-NEXT: subl $8, %esp ; X86-AVX512F-WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX512F-WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX512F-WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 ; X86-AVX512F-WIN-NEXT: xorl %edx, %edx ; X86-AVX512F-WIN-NEXT: vucomiss %xmm0, %xmm1 ; X86-AVX512F-WIN-NEXT: setbe %dl ; X86-AVX512F-WIN-NEXT: kmovw %edx, %k1 -; X86-AVX512F-WIN-NEXT: vmovss %xmm1, %xmm1, %xmm1 {%k1} {z} -; X86-AVX512F-WIN-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; X86-AVX512F-WIN-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} ; X86-AVX512F-WIN-NEXT: vmovss %xmm0, (%esp) ; X86-AVX512F-WIN-NEXT: flds (%esp) ; X86-AVX512F-WIN-NEXT: fisttpll (%esp) @@ -90,12 +90,12 @@ ; X86-AVX512F-LIN-NEXT: subl $12, %esp ; X86-AVX512F-LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX512F-LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX512F-LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 ; X86-AVX512F-LIN-NEXT: xorl %edx, %edx ; X86-AVX512F-LIN-NEXT: vucomiss %xmm0, %xmm1 ; X86-AVX512F-LIN-NEXT: setbe %dl ; X86-AVX512F-LIN-NEXT: kmovw %edx, %k1 -; X86-AVX512F-LIN-NEXT: vmovss %xmm1, %xmm1, %xmm1 {%k1} {z} -; X86-AVX512F-LIN-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; X86-AVX512F-LIN-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} ; X86-AVX512F-LIN-NEXT: vmovss %xmm0, (%esp) ; X86-AVX512F-LIN-NEXT: flds (%esp) ; X86-AVX512F-LIN-NEXT: fisttpll (%esp) @@ -114,11 +114,10 @@ ; X86-SSE3-WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE3-WIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE3-WIN-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE3-WIN-NEXT: jbe LBB0_2 +; X86-SSE3-WIN-NEXT: ja LBB0_2 ; X86-SSE3-WIN-NEXT: # %bb.1: -; X86-SSE3-WIN-NEXT: xorps %xmm1, %xmm1 -; X86-SSE3-WIN-NEXT: LBB0_2: ; X86-SSE3-WIN-NEXT: subss %xmm1, %xmm0 +; X86-SSE3-WIN-NEXT: LBB0_2: ; X86-SSE3-WIN-NEXT: movss %xmm0, (%esp) ; X86-SSE3-WIN-NEXT: flds (%esp) ; X86-SSE3-WIN-NEXT: fisttpll (%esp) @@ -137,11 +136,10 @@ ; X86-SSE3-LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE3-LIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE3-LIN-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE3-LIN-NEXT: jbe .LBB0_2 +; X86-SSE3-LIN-NEXT: ja .LBB0_2 ; X86-SSE3-LIN-NEXT: # %bb.1: -; X86-SSE3-LIN-NEXT: xorps %xmm1, %xmm1 -; X86-SSE3-LIN-NEXT: .LBB0_2: ; X86-SSE3-LIN-NEXT: subss %xmm1, %xmm0 +; X86-SSE3-LIN-NEXT: .LBB0_2: ; X86-SSE3-LIN-NEXT: movss %xmm0, (%esp) ; X86-SSE3-LIN-NEXT: flds (%esp) ; X86-SSE3-LIN-NEXT: fisttpll (%esp) @@ -184,11 +182,10 @@ ; X86-SSE2-WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE2-WIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE2-WIN-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE2-WIN-NEXT: jbe LBB0_2 +; X86-SSE2-WIN-NEXT: ja LBB0_2 ; X86-SSE2-WIN-NEXT: # %bb.1: -; X86-SSE2-WIN-NEXT: xorps %xmm1, %xmm1 -; X86-SSE2-WIN-NEXT: LBB0_2: ; X86-SSE2-WIN-NEXT: subss %xmm1, %xmm0 +; X86-SSE2-WIN-NEXT: LBB0_2: ; X86-SSE2-WIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-WIN-NEXT: setbe %al ; X86-SSE2-WIN-NEXT: flds {{[0-9]+}}(%esp) @@ -213,11 +210,10 @@ ; X86-SSE2-LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE2-LIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE2-LIN-NEXT: ucomiss %xmm0, %xmm1 -; X86-SSE2-LIN-NEXT: jbe .LBB0_2 +; X86-SSE2-LIN-NEXT: ja .LBB0_2 ; X86-SSE2-LIN-NEXT: # %bb.1: -; X86-SSE2-LIN-NEXT: xorps %xmm1, %xmm1 -; X86-SSE2-LIN-NEXT: .LBB0_2: ; X86-SSE2-LIN-NEXT: subss %xmm1, %xmm0 +; X86-SSE2-LIN-NEXT: .LBB0_2: ; X86-SSE2-LIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-LIN-NEXT: setbe %al ; X86-SSE2-LIN-NEXT: flds {{[0-9]+}}(%esp) @@ -243,20 +239,21 @@ ; X87-WIN-NEXT: subl $16, %esp ; X87-WIN-NEXT: flds 8(%ebp) ; X87-WIN-NEXT: flds __real@5f000000 -; X87-WIN-NEXT: fucom %st(1) +; X87-WIN-NEXT: fld %st(1) +; X87-WIN-NEXT: fsub %st(1), %st +; X87-WIN-NEXT: fxch %st(1) +; X87-WIN-NEXT: fucomp %st(2) ; X87-WIN-NEXT: fnstsw %ax -; X87-WIN-NEXT: xorl %edx, %edx ; X87-WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87-WIN-NEXT: sahf -; X87-WIN-NEXT: setbe %al -; X87-WIN-NEXT: fldz ; X87-WIN-NEXT: jbe LBB0_2 ; X87-WIN-NEXT: # %bb.1: -; X87-WIN-NEXT: fstp %st(1) +; X87-WIN-NEXT: fstp %st(0) ; X87-WIN-NEXT: fldz +; X87-WIN-NEXT: fxch %st(1) ; X87-WIN-NEXT: LBB0_2: -; X87-WIN-NEXT: fstp %st(0) -; X87-WIN-NEXT: fsubrp %st, %st(1) +; X87-WIN-NEXT: fstp %st(1) +; X87-WIN-NEXT: setbe %al ; X87-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87-WIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -264,7 +261,7 @@ ; X87-WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87-WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-WIN-NEXT: movb %al, %dl +; X87-WIN-NEXT: movzbl %al, %edx ; X87-WIN-NEXT: shll $31, %edx ; X87-WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -277,20 +274,21 @@ ; X87-LIN-NEXT: subl $20, %esp ; X87-LIN-NEXT: flds {{[0-9]+}}(%esp) ; X87-LIN-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X87-LIN-NEXT: fucom %st(1) +; X87-LIN-NEXT: fld %st(1) +; X87-LIN-NEXT: fsub %st(1), %st +; X87-LIN-NEXT: fxch %st(1) +; X87-LIN-NEXT: fucomp %st(2) ; X87-LIN-NEXT: fnstsw %ax -; X87-LIN-NEXT: xorl %edx, %edx ; X87-LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87-LIN-NEXT: sahf -; X87-LIN-NEXT: setbe %al -; X87-LIN-NEXT: fldz ; X87-LIN-NEXT: jbe .LBB0_2 ; X87-LIN-NEXT: # %bb.1: -; X87-LIN-NEXT: fstp %st(1) +; X87-LIN-NEXT: fstp %st(0) ; X87-LIN-NEXT: fldz +; X87-LIN-NEXT: fxch %st(1) ; X87-LIN-NEXT: .LBB0_2: -; X87-LIN-NEXT: fstp %st(0) -; X87-LIN-NEXT: fsubrp %st, %st(1) +; X87-LIN-NEXT: fstp %st(1) +; X87-LIN-NEXT: setbe %al ; X87-LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87-LIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -298,7 +296,7 @@ ; X87-LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87-LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-LIN-NEXT: movb %al, %dl +; X87-LIN-NEXT: movzbl %al, %edx ; X87-LIN-NEXT: shll $31, %edx ; X87-LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87-LIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -502,12 +500,12 @@ ; X86-AVX512F-WIN-NEXT: subl $8, %esp ; X86-AVX512F-WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX512F-WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X86-AVX512F-WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 ; X86-AVX512F-WIN-NEXT: xorl %edx, %edx ; X86-AVX512F-WIN-NEXT: vucomisd %xmm0, %xmm1 ; X86-AVX512F-WIN-NEXT: setbe %dl ; X86-AVX512F-WIN-NEXT: kmovw %edx, %k1 -; X86-AVX512F-WIN-NEXT: vmovsd %xmm1, %xmm1, %xmm1 {%k1} {z} -; X86-AVX512F-WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; X86-AVX512F-WIN-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; X86-AVX512F-WIN-NEXT: vmovsd %xmm0, (%esp) ; X86-AVX512F-WIN-NEXT: fldl (%esp) ; X86-AVX512F-WIN-NEXT: fisttpll (%esp) @@ -523,12 +521,12 @@ ; X86-AVX512F-LIN-NEXT: subl $12, %esp ; X86-AVX512F-LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX512F-LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X86-AVX512F-LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 ; X86-AVX512F-LIN-NEXT: xorl %edx, %edx ; X86-AVX512F-LIN-NEXT: vucomisd %xmm0, %xmm1 ; X86-AVX512F-LIN-NEXT: setbe %dl ; X86-AVX512F-LIN-NEXT: kmovw %edx, %k1 -; X86-AVX512F-LIN-NEXT: vmovsd %xmm1, %xmm1, %xmm1 {%k1} {z} -; X86-AVX512F-LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; X86-AVX512F-LIN-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; X86-AVX512F-LIN-NEXT: vmovsd %xmm0, (%esp) ; X86-AVX512F-LIN-NEXT: fldl (%esp) ; X86-AVX512F-LIN-NEXT: fisttpll (%esp) @@ -547,11 +545,10 @@ ; X86-SSE3-WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE3-WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X86-SSE3-WIN-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE3-WIN-NEXT: jbe LBB2_2 +; X86-SSE3-WIN-NEXT: ja LBB2_2 ; X86-SSE3-WIN-NEXT: # %bb.1: -; X86-SSE3-WIN-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE3-WIN-NEXT: LBB2_2: ; X86-SSE3-WIN-NEXT: subsd %xmm1, %xmm0 +; X86-SSE3-WIN-NEXT: LBB2_2: ; X86-SSE3-WIN-NEXT: movsd %xmm0, (%esp) ; X86-SSE3-WIN-NEXT: fldl (%esp) ; X86-SSE3-WIN-NEXT: fisttpll (%esp) @@ -570,11 +567,10 @@ ; X86-SSE3-LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE3-LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X86-SSE3-LIN-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE3-LIN-NEXT: jbe .LBB2_2 +; X86-SSE3-LIN-NEXT: ja .LBB2_2 ; X86-SSE3-LIN-NEXT: # %bb.1: -; X86-SSE3-LIN-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE3-LIN-NEXT: .LBB2_2: ; X86-SSE3-LIN-NEXT: subsd %xmm1, %xmm0 +; X86-SSE3-LIN-NEXT: .LBB2_2: ; X86-SSE3-LIN-NEXT: movsd %xmm0, (%esp) ; X86-SSE3-LIN-NEXT: fldl (%esp) ; X86-SSE3-LIN-NEXT: fisttpll (%esp) @@ -617,11 +613,10 @@ ; X86-SSE2-WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X86-SSE2-WIN-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE2-WIN-NEXT: jbe LBB2_2 +; X86-SSE2-WIN-NEXT: ja LBB2_2 ; X86-SSE2-WIN-NEXT: # %bb.1: -; X86-SSE2-WIN-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE2-WIN-NEXT: LBB2_2: ; X86-SSE2-WIN-NEXT: subsd %xmm1, %xmm0 +; X86-SSE2-WIN-NEXT: LBB2_2: ; X86-SSE2-WIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-WIN-NEXT: setbe %al ; X86-SSE2-WIN-NEXT: fldl {{[0-9]+}}(%esp) @@ -646,11 +641,10 @@ ; X86-SSE2-LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE2-LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X86-SSE2-LIN-NEXT: ucomisd %xmm0, %xmm1 -; X86-SSE2-LIN-NEXT: jbe .LBB2_2 +; X86-SSE2-LIN-NEXT: ja .LBB2_2 ; X86-SSE2-LIN-NEXT: # %bb.1: -; X86-SSE2-LIN-NEXT: xorpd %xmm1, %xmm1 -; X86-SSE2-LIN-NEXT: .LBB2_2: ; X86-SSE2-LIN-NEXT: subsd %xmm1, %xmm0 +; X86-SSE2-LIN-NEXT: .LBB2_2: ; X86-SSE2-LIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; X86-SSE2-LIN-NEXT: setbe %al ; X86-SSE2-LIN-NEXT: fldl {{[0-9]+}}(%esp) @@ -676,20 +670,21 @@ ; X87-WIN-NEXT: subl $16, %esp ; X87-WIN-NEXT: fldl 8(%ebp) ; X87-WIN-NEXT: flds __real@5f000000 -; X87-WIN-NEXT: fucom %st(1) +; X87-WIN-NEXT: fld %st(1) +; X87-WIN-NEXT: fsub %st(1), %st +; X87-WIN-NEXT: fxch %st(1) +; X87-WIN-NEXT: fucomp %st(2) ; X87-WIN-NEXT: fnstsw %ax -; X87-WIN-NEXT: xorl %edx, %edx ; X87-WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87-WIN-NEXT: sahf -; X87-WIN-NEXT: setbe %al -; X87-WIN-NEXT: fldz ; X87-WIN-NEXT: jbe LBB2_2 ; X87-WIN-NEXT: # %bb.1: -; X87-WIN-NEXT: fstp %st(1) +; X87-WIN-NEXT: fstp %st(0) ; X87-WIN-NEXT: fldz +; X87-WIN-NEXT: fxch %st(1) ; X87-WIN-NEXT: LBB2_2: -; X87-WIN-NEXT: fstp %st(0) -; X87-WIN-NEXT: fsubrp %st, %st(1) +; X87-WIN-NEXT: fstp %st(1) +; X87-WIN-NEXT: setbe %al ; X87-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87-WIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -697,7 +692,7 @@ ; X87-WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87-WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-WIN-NEXT: movb %al, %dl +; X87-WIN-NEXT: movzbl %al, %edx ; X87-WIN-NEXT: shll $31, %edx ; X87-WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -710,20 +705,21 @@ ; X87-LIN-NEXT: subl $20, %esp ; X87-LIN-NEXT: fldl {{[0-9]+}}(%esp) ; X87-LIN-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X87-LIN-NEXT: fucom %st(1) +; X87-LIN-NEXT: fld %st(1) +; X87-LIN-NEXT: fsub %st(1), %st +; X87-LIN-NEXT: fxch %st(1) +; X87-LIN-NEXT: fucomp %st(2) ; X87-LIN-NEXT: fnstsw %ax -; X87-LIN-NEXT: xorl %edx, %edx ; X87-LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87-LIN-NEXT: sahf -; X87-LIN-NEXT: setbe %al -; X87-LIN-NEXT: fldz ; X87-LIN-NEXT: jbe .LBB2_2 ; X87-LIN-NEXT: # %bb.1: -; X87-LIN-NEXT: fstp %st(1) +; X87-LIN-NEXT: fstp %st(0) ; X87-LIN-NEXT: fldz +; X87-LIN-NEXT: fxch %st(1) ; X87-LIN-NEXT: .LBB2_2: -; X87-LIN-NEXT: fstp %st(0) -; X87-LIN-NEXT: fsubrp %st, %st(1) +; X87-LIN-NEXT: fstp %st(1) +; X87-LIN-NEXT: setbe %al ; X87-LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87-LIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -731,7 +727,7 @@ ; X87-LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87-LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-LIN-NEXT: movb %al, %dl +; X87-LIN-NEXT: movzbl %al, %edx ; X87-LIN-NEXT: shll $31, %edx ; X87-LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87-LIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -913,12 +909,14 @@ ; X86-AVX512-WIN-NEXT: subl $16, %esp ; X86-AVX512-WIN-NEXT: fldt 8(%ebp) ; X86-AVX512-WIN-NEXT: flds __real@5f000000 +; X86-AVX512-WIN-NEXT: fld %st(1) +; X86-AVX512-WIN-NEXT: fsub %st(1), %st ; X86-AVX512-WIN-NEXT: xorl %edx, %edx -; X86-AVX512-WIN-NEXT: fucomi %st(1), %st -; X86-AVX512-WIN-NEXT: fldz +; X86-AVX512-WIN-NEXT: fxch %st(1) +; X86-AVX512-WIN-NEXT: fucompi %st(2), %st +; X86-AVX512-WIN-NEXT: fxch %st(1) ; X86-AVX512-WIN-NEXT: fcmovbe %st(1), %st ; X86-AVX512-WIN-NEXT: fstp %st(1) -; X86-AVX512-WIN-NEXT: fsubrp %st, %st(1) ; X86-AVX512-WIN-NEXT: fisttpll (%esp) ; X86-AVX512-WIN-NEXT: setbe %dl ; X86-AVX512-WIN-NEXT: shll $31, %edx @@ -933,12 +931,14 @@ ; X86-AVX512-LIN-NEXT: subl $12, %esp ; X86-AVX512-LIN-NEXT: fldt {{[0-9]+}}(%esp) ; X86-AVX512-LIN-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-AVX512-LIN-NEXT: fld %st(1) +; X86-AVX512-LIN-NEXT: fsub %st(1), %st ; X86-AVX512-LIN-NEXT: xorl %edx, %edx -; X86-AVX512-LIN-NEXT: fucomi %st(1), %st -; X86-AVX512-LIN-NEXT: fldz +; X86-AVX512-LIN-NEXT: fxch %st(1) +; X86-AVX512-LIN-NEXT: fucompi %st(2), %st +; X86-AVX512-LIN-NEXT: fxch %st(1) ; X86-AVX512-LIN-NEXT: fcmovbe %st(1), %st ; X86-AVX512-LIN-NEXT: fstp %st(1) -; X86-AVX512-LIN-NEXT: fsubrp %st, %st(1) ; X86-AVX512-LIN-NEXT: fisttpll (%esp) ; X86-AVX512-LIN-NEXT: setbe %dl ; X86-AVX512-LIN-NEXT: shll $31, %edx @@ -989,12 +989,14 @@ ; X86-SSE3-WIN-NEXT: subl $16, %esp ; X86-SSE3-WIN-NEXT: fldt 8(%ebp) ; X86-SSE3-WIN-NEXT: flds __real@5f000000 +; X86-SSE3-WIN-NEXT: fld %st(1) +; X86-SSE3-WIN-NEXT: fsub %st(1), %st ; X86-SSE3-WIN-NEXT: xorl %edx, %edx -; X86-SSE3-WIN-NEXT: fucomi %st(1), %st -; X86-SSE3-WIN-NEXT: fldz +; X86-SSE3-WIN-NEXT: fxch %st(1) +; X86-SSE3-WIN-NEXT: fucompi %st(2), %st +; X86-SSE3-WIN-NEXT: fxch %st(1) ; X86-SSE3-WIN-NEXT: fcmovbe %st(1), %st ; X86-SSE3-WIN-NEXT: fstp %st(1) -; X86-SSE3-WIN-NEXT: fsubrp %st, %st(1) ; X86-SSE3-WIN-NEXT: fisttpll (%esp) ; X86-SSE3-WIN-NEXT: setbe %dl ; X86-SSE3-WIN-NEXT: shll $31, %edx @@ -1009,12 +1011,14 @@ ; X86-SSE3-LIN-NEXT: subl $12, %esp ; X86-SSE3-LIN-NEXT: fldt {{[0-9]+}}(%esp) ; X86-SSE3-LIN-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE3-LIN-NEXT: fld %st(1) +; X86-SSE3-LIN-NEXT: fsub %st(1), %st ; X86-SSE3-LIN-NEXT: xorl %edx, %edx -; X86-SSE3-LIN-NEXT: fucomi %st(1), %st -; X86-SSE3-LIN-NEXT: fldz +; X86-SSE3-LIN-NEXT: fxch %st(1) +; X86-SSE3-LIN-NEXT: fucompi %st(2), %st +; X86-SSE3-LIN-NEXT: fxch %st(1) ; X86-SSE3-LIN-NEXT: fcmovbe %st(1), %st ; X86-SSE3-LIN-NEXT: fstp %st(1) -; X86-SSE3-LIN-NEXT: fsubrp %st, %st(1) ; X86-SSE3-LIN-NEXT: fisttpll (%esp) ; X86-SSE3-LIN-NEXT: setbe %dl ; X86-SSE3-LIN-NEXT: shll $31, %edx @@ -1065,13 +1069,15 @@ ; X86-SSE2-WIN-NEXT: subl $32, %esp ; X86-SSE2-WIN-NEXT: fldt 8(%ebp) ; X86-SSE2-WIN-NEXT: flds __real@5f000000 +; X86-SSE2-WIN-NEXT: fld %st(1) +; X86-SSE2-WIN-NEXT: fsub %st(1), %st ; X86-SSE2-WIN-NEXT: xorl %edx, %edx -; X86-SSE2-WIN-NEXT: fucomi %st(1), %st -; X86-SSE2-WIN-NEXT: setbe %dl -; X86-SSE2-WIN-NEXT: fldz +; X86-SSE2-WIN-NEXT: fxch %st(1) +; X86-SSE2-WIN-NEXT: fucompi %st(2), %st +; X86-SSE2-WIN-NEXT: fxch %st(1) ; X86-SSE2-WIN-NEXT: fcmovbe %st(1), %st ; X86-SSE2-WIN-NEXT: fstp %st(1) -; X86-SSE2-WIN-NEXT: fsubrp %st, %st(1) +; X86-SSE2-WIN-NEXT: setbe %dl ; X86-SSE2-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE2-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SSE2-WIN-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1091,13 +1097,15 @@ ; X86-SSE2-LIN-NEXT: subl $20, %esp ; X86-SSE2-LIN-NEXT: fldt {{[0-9]+}}(%esp) ; X86-SSE2-LIN-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE2-LIN-NEXT: fld %st(1) +; X86-SSE2-LIN-NEXT: fsub %st(1), %st ; X86-SSE2-LIN-NEXT: xorl %edx, %edx -; X86-SSE2-LIN-NEXT: fucomi %st(1), %st -; X86-SSE2-LIN-NEXT: setbe %dl -; X86-SSE2-LIN-NEXT: fldz +; X86-SSE2-LIN-NEXT: fxch %st(1) +; X86-SSE2-LIN-NEXT: fucompi %st(2), %st +; X86-SSE2-LIN-NEXT: fxch %st(1) ; X86-SSE2-LIN-NEXT: fcmovbe %st(1), %st ; X86-SSE2-LIN-NEXT: fstp %st(1) -; X86-SSE2-LIN-NEXT: fsubrp %st, %st(1) +; X86-SSE2-LIN-NEXT: setbe %dl ; X86-SSE2-LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE2-LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SSE2-LIN-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1165,20 +1173,21 @@ ; X87-WIN-NEXT: subl $32, %esp ; X87-WIN-NEXT: fldt 8(%ebp) ; X87-WIN-NEXT: flds __real@5f000000 -; X87-WIN-NEXT: fucom %st(1) +; X87-WIN-NEXT: fld %st(1) +; X87-WIN-NEXT: fsub %st(1), %st +; X87-WIN-NEXT: fxch %st(1) +; X87-WIN-NEXT: fucomp %st(2) ; X87-WIN-NEXT: fnstsw %ax -; X87-WIN-NEXT: xorl %edx, %edx ; X87-WIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87-WIN-NEXT: sahf -; X87-WIN-NEXT: setbe %al -; X87-WIN-NEXT: fldz ; X87-WIN-NEXT: jbe LBB4_2 ; X87-WIN-NEXT: # %bb.1: -; X87-WIN-NEXT: fstp %st(1) +; X87-WIN-NEXT: fstp %st(0) ; X87-WIN-NEXT: fldz +; X87-WIN-NEXT: fxch %st(1) ; X87-WIN-NEXT: LBB4_2: -; X87-WIN-NEXT: fstp %st(0) -; X87-WIN-NEXT: fsubrp %st, %st(1) +; X87-WIN-NEXT: fstp %st(1) +; X87-WIN-NEXT: setbe %al ; X87-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87-WIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1186,7 +1195,7 @@ ; X87-WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87-WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-WIN-NEXT: movb %al, %dl +; X87-WIN-NEXT: movzbl %al, %edx ; X87-WIN-NEXT: shll $31, %edx ; X87-WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1199,20 +1208,21 @@ ; X87-LIN-NEXT: subl $20, %esp ; X87-LIN-NEXT: fldt {{[0-9]+}}(%esp) ; X87-LIN-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; X87-LIN-NEXT: fucom %st(1) +; X87-LIN-NEXT: fld %st(1) +; X87-LIN-NEXT: fsub %st(1), %st +; X87-LIN-NEXT: fxch %st(1) +; X87-LIN-NEXT: fucomp %st(2) ; X87-LIN-NEXT: fnstsw %ax -; X87-LIN-NEXT: xorl %edx, %edx ; X87-LIN-NEXT: # kill: def $ah killed $ah killed $ax ; X87-LIN-NEXT: sahf -; X87-LIN-NEXT: setbe %al -; X87-LIN-NEXT: fldz ; X87-LIN-NEXT: jbe .LBB4_2 ; X87-LIN-NEXT: # %bb.1: -; X87-LIN-NEXT: fstp %st(1) +; X87-LIN-NEXT: fstp %st(0) ; X87-LIN-NEXT: fldz +; X87-LIN-NEXT: fxch %st(1) ; X87-LIN-NEXT: .LBB4_2: -; X87-LIN-NEXT: fstp %st(0) -; X87-LIN-NEXT: fsubrp %st, %st(1) +; X87-LIN-NEXT: fstp %st(1) +; X87-LIN-NEXT: setbe %al ; X87-LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-LIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X87-LIN-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1220,7 +1230,7 @@ ; X87-LIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; X87-LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-LIN-NEXT: movb %al, %dl +; X87-LIN-NEXT: movzbl %al, %edx ; X87-LIN-NEXT: shll $31, %edx ; X87-LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X87-LIN-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll --- a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll +++ b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll @@ -328,12 +328,14 @@ ; AVX512F_32-NEXT: movl %esp, %ebp ; AVX512F_32-NEXT: andl $-8, %esp ; AVX512F_32-NEXT: subl $16, %esp -; AVX512F_32-NEXT: movl 12(%ebp), %eax ; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F_32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; AVX512F_32-NEXT: shrl $31, %eax +; AVX512F_32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; AVX512F_32-NEXT: leal 4(%eax), %ecx +; AVX512F_32-NEXT: cmpl $0, 12(%ebp) +; AVX512F_32-NEXT: cmovnsl %eax, %ecx ; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX512F_32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX512F_32-NEXT: fadds (%ecx) ; AVX512F_32-NEXT: fstps {{[0-9]+}}(%esp) ; AVX512F_32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F_32-NEXT: vmovss %xmm0, (%esp) @@ -348,12 +350,14 @@ ; SSE2_32-NEXT: movl %esp, %ebp ; SSE2_32-NEXT: andl $-8, %esp ; SSE2_32-NEXT: subl $16, %esp -; SSE2_32-NEXT: movl 12(%ebp), %eax ; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE2_32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; SSE2_32-NEXT: shrl $31, %eax +; SSE2_32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE2_32-NEXT: leal 4(%eax), %ecx +; SSE2_32-NEXT: cmpl $0, 12(%ebp) +; SSE2_32-NEXT: cmovnsl %eax, %ecx ; SSE2_32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE2_32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE2_32-NEXT: fadds (%ecx) ; SSE2_32-NEXT: fstps {{[0-9]+}}(%esp) ; SSE2_32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2_32-NEXT: movss %xmm0, (%esp) @@ -390,9 +394,12 @@ ; SSE1_32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; SSE1_32-NEXT: fldl {{[0-9]+}}(%esp) ; SSE1_32-NEXT: fstpl {{[0-9]+}}(%esp) -; SSE1_32-NEXT: shrl $31, %ecx +; SSE1_32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE1_32-NEXT: leal 4(%eax), %edx +; SSE1_32-NEXT: testl %ecx, %ecx +; SSE1_32-NEXT: cmovnsl %eax, %edx ; SSE1_32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE1_32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; SSE1_32-NEXT: fadds (%edx) ; SSE1_32-NEXT: fstps {{[0-9]+}}(%esp) ; SSE1_32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE1_32-NEXT: movss %xmm0, (%esp) @@ -411,9 +418,14 @@ ; X87-NEXT: movl 12(%ebp), %ecx ; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X87-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X87-NEXT: shrl $31, %ecx +; X87-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: jns .LBB6_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: addl $4, %eax +; X87-NEXT: .LBB6_2: ; X87-NEXT: fildll {{[0-9]+}}(%esp) -; X87-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X87-NEXT: fadds (%eax) ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: movl %ebp, %esp @@ -699,9 +711,12 @@ ; SSE1_32-NEXT: movl 12(%ebp), %ecx ; SSE1_32-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; SSE1_32-NEXT: movl %eax, (%esp) -; SSE1_32-NEXT: shrl $31, %ecx +; SSE1_32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE1_32-NEXT: leal 4(%eax), %edx +; SSE1_32-NEXT: testl %ecx, %ecx +; SSE1_32-NEXT: cmovnsl %eax, %edx ; SSE1_32-NEXT: fildll (%esp) -; SSE1_32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; SSE1_32-NEXT: fadds (%edx) ; SSE1_32-NEXT: fstpl {{[0-9]+}}(%esp) ; SSE1_32-NEXT: fldl {{[0-9]+}}(%esp) ; SSE1_32-NEXT: movl %ebp, %esp @@ -718,9 +733,14 @@ ; X87-NEXT: movl 12(%ebp), %ecx ; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X87-NEXT: movl %eax, (%esp) -; X87-NEXT: shrl $31, %ecx +; X87-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: jns .LBB9_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: addl $4, %eax +; X87-NEXT: .LBB9_2: ; X87-NEXT: fildll (%esp) -; X87-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X87-NEXT: fadds (%eax) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: movl %ebp, %esp @@ -820,9 +840,12 @@ ; SSE1_32-NEXT: movl 12(%ebp), %ecx ; SSE1_32-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; SSE1_32-NEXT: movl %eax, (%esp) -; SSE1_32-NEXT: shrl $31, %ecx +; SSE1_32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE1_32-NEXT: leal 4(%eax), %edx +; SSE1_32-NEXT: testl %ecx, %ecx +; SSE1_32-NEXT: cmovnsl %eax, %edx ; SSE1_32-NEXT: fildll (%esp) -; SSE1_32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; SSE1_32-NEXT: fadds (%edx) ; SSE1_32-NEXT: fstpl {{[0-9]+}}(%esp) ; SSE1_32-NEXT: fldl {{[0-9]+}}(%esp) ; SSE1_32-NEXT: movl %ebp, %esp @@ -839,9 +862,14 @@ ; X87-NEXT: movl 12(%ebp), %ecx ; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X87-NEXT: movl %eax, (%esp) -; X87-NEXT: shrl $31, %ecx +; X87-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: jns .LBB10_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: addl $4, %eax +; X87-NEXT: .LBB10_2: ; X87-NEXT: fildll (%esp) -; X87-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; X87-NEXT: fadds (%eax) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: movl %ebp, %esp @@ -1064,22 +1092,25 @@ } define x86_fp80 @u64_to_x(i64 %a) nounwind { -; CHECK32-LABEL: u64_to_x: -; CHECK32: # %bb.0: -; CHECK32-NEXT: pushl %ebp -; CHECK32-NEXT: movl %esp, %ebp -; CHECK32-NEXT: andl $-8, %esp -; CHECK32-NEXT: subl $8, %esp -; CHECK32-NEXT: movl 8(%ebp), %eax -; CHECK32-NEXT: movl 12(%ebp), %ecx -; CHECK32-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK32-NEXT: movl %eax, (%esp) -; CHECK32-NEXT: shrl $31, %ecx -; CHECK32-NEXT: fildll (%esp) -; CHECK32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) -; CHECK32-NEXT: movl %ebp, %esp -; CHECK32-NEXT: popl %ebp -; CHECK32-NEXT: retl +; AVX512_32-LABEL: u64_to_x: +; AVX512_32: # %bb.0: +; AVX512_32-NEXT: pushl %ebp +; AVX512_32-NEXT: movl %esp, %ebp +; AVX512_32-NEXT: andl $-8, %esp +; AVX512_32-NEXT: subl $8, %esp +; AVX512_32-NEXT: movl 8(%ebp), %eax +; AVX512_32-NEXT: movl 12(%ebp), %ecx +; AVX512_32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; AVX512_32-NEXT: movl %eax, (%esp) +; AVX512_32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; AVX512_32-NEXT: leal 4(%eax), %edx +; AVX512_32-NEXT: testl %ecx, %ecx +; AVX512_32-NEXT: cmovnsl %eax, %edx +; AVX512_32-NEXT: fildll (%esp) +; AVX512_32-NEXT: fadds (%edx) +; AVX512_32-NEXT: movl %ebp, %esp +; AVX512_32-NEXT: popl %ebp +; AVX512_32-NEXT: retl ; ; CHECK64-LABEL: u64_to_x: ; CHECK64: # %bb.0: @@ -1090,6 +1121,48 @@ ; CHECK64-NEXT: fildll -{{[0-9]+}}(%rsp) ; CHECK64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%rax,4) ; CHECK64-NEXT: retq +; +; SSE_32-LABEL: u64_to_x: +; SSE_32: # %bb.0: +; SSE_32-NEXT: pushl %ebp +; SSE_32-NEXT: movl %esp, %ebp +; SSE_32-NEXT: andl $-8, %esp +; SSE_32-NEXT: subl $8, %esp +; SSE_32-NEXT: movl 8(%ebp), %eax +; SSE_32-NEXT: movl 12(%ebp), %ecx +; SSE_32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; SSE_32-NEXT: movl %eax, (%esp) +; SSE_32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE_32-NEXT: leal 4(%eax), %edx +; SSE_32-NEXT: testl %ecx, %ecx +; SSE_32-NEXT: cmovnsl %eax, %edx +; SSE_32-NEXT: fildll (%esp) +; SSE_32-NEXT: fadds (%edx) +; SSE_32-NEXT: movl %ebp, %esp +; SSE_32-NEXT: popl %ebp +; SSE_32-NEXT: retl +; +; X87-LABEL: u64_to_x: +; X87: # %bb.0: +; X87-NEXT: pushl %ebp +; X87-NEXT: movl %esp, %ebp +; X87-NEXT: andl $-8, %esp +; X87-NEXT: subl $8, %esp +; X87-NEXT: movl 8(%ebp), %eax +; X87-NEXT: movl 12(%ebp), %ecx +; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: jns .LBB13_2 +; X87-NEXT: # %bb.1: +; X87-NEXT: addl $4, %eax +; X87-NEXT: .LBB13_2: +; X87-NEXT: fildll (%esp) +; X87-NEXT: fadds (%eax) +; X87-NEXT: movl %ebp, %esp +; X87-NEXT: popl %ebp +; X87-NEXT: retl %r = uitofp i64 %a to x86_fp80 ret x86_fp80 %r } diff --git a/llvm/test/CodeGen/X86/select-of-fp-constants.ll b/llvm/test/CodeGen/X86/select-of-fp-constants.ll --- a/llvm/test/CodeGen/X86/select-of-fp-constants.ll +++ b/llvm/test/CodeGen/X86/select-of-fp-constants.ll @@ -13,25 +13,28 @@ define float @icmp_select_fp_constants(i32 %x) nounwind readnone { ; X86-LABEL: icmp_select_fp_constants: ; X86: # %bb.0: -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-NEXT: leal 4(%eax), %ecx ; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X86-NEXT: sete %al -; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-NEXT: cmovnel %eax, %ecx +; X86-NEXT: flds (%ecx) ; X86-NEXT: retl ; ; X64-SSE-LABEL: icmp_select_fp_constants: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: xorl %eax, %eax +; X64-SSE-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X64-SSE-NEXT: leaq 4(%rax), %rcx ; X64-SSE-NEXT: testl %edi, %edi -; X64-SSE-NEXT: sete %al +; X64-SSE-NEXT: cmovneq %rax, %rcx ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: icmp_select_fp_constants: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: xorl %eax, %eax +; X64-AVX-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X64-AVX-NEXT: leaq 4(%rax), %rcx ; X64-AVX-NEXT: testl %edi, %edi -; X64-AVX-NEXT: sete %al +; X64-AVX-NEXT: cmovneq %rax, %rcx ; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-AVX-NEXT: retq %c = icmp eq i32 %x, 0 @@ -43,34 +46,43 @@ ; X86-SSE-LABEL: fcmp_select_fp_constants: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: cmpneqss {{[0-9]+}}(%esp), %xmm0 -; X86-SSE-NEXT: movd %xmm0, %eax -; X86-SSE-NEXT: andl $1, %eax -; X86-SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-SSE-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-SSE-NEXT: leal 4(%eax), %ecx +; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: cmovnel %ecx, %eax +; X86-SSE-NEXT: cmovpl %ecx, %eax +; X86-SSE-NEXT: flds (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX2-LABEL: fcmp_select_fp_constants: ; X86-AVX2: # %bb.0: ; X86-AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX2-NEXT: vcmpneqss {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovd %xmm0, %eax -; X86-AVX2-NEXT: andl $1, %eax -; X86-AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-AVX2-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-AVX2-NEXT: leal 4(%eax), %ecx +; X86-AVX2-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-AVX2-NEXT: cmovnel %ecx, %eax +; X86-AVX2-NEXT: cmovpl %ecx, %eax +; X86-AVX2-NEXT: flds (%eax) ; X86-AVX2-NEXT: retl ; ; X86-AVX512F-LABEL: fcmp_select_fp_constants: ; X86-AVX512F: # %bb.0: ; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX512F-NEXT: vcmpneqss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %k0 -; X86-AVX512F-NEXT: kmovw %k0, %eax -; X86-AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-AVX512F-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-AVX512F-NEXT: leal 4(%eax), %ecx +; X86-AVX512F-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-AVX512F-NEXT: cmovnel %ecx, %eax +; X86-AVX512F-NEXT: cmovpl %ecx, %eax +; X86-AVX512F-NEXT: flds (%eax) ; X86-AVX512F-NEXT: retl ; ; X64-SSE-LABEL: fcmp_select_fp_constants: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: cmpneqss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: andl $1, %eax +; X64-SSE-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X64-SSE-NEXT: leaq 4(%rax), %rcx +; X64-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: cmovneq %rcx, %rax +; X64-SSE-NEXT: cmovpq %rcx, %rax ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -137,36 +137,40 @@ define float @test3(i32 %x) nounwind readnone { ; GENERIC-LABEL: test3: ; GENERIC: ## %bb.0: ## %entry -; GENERIC-NEXT: xorl %eax, %eax +; GENERIC-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax +; GENERIC-NEXT: leaq 4(%rax), %rcx ; GENERIC-NEXT: testl %edi, %edi -; GENERIC-NEXT: sete %al -; GENERIC-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; GENERIC-NEXT: cmovneq %rax, %rcx ; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test3: ; ATOM: ## %bb.0: ## %entry -; ATOM-NEXT: xorl %eax, %eax -; ATOM-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; ATOM-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; ATOM-NEXT: testl %edi, %edi -; ATOM-NEXT: sete %al +; ATOM-NEXT: leaq 4(%rax), %rcx +; ATOM-NEXT: cmovneq %rax, %rcx ; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ATOM-NEXT: retq ; ; ATHLON-LABEL: test3: ; ATHLON: ## %bb.0: ## %entry -; ATHLON-NEXT: xorl %eax, %eax +; ATHLON-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; ATHLON-NEXT: leal 4(%eax), %ecx ; ATHLON-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; ATHLON-NEXT: sete %al -; ATHLON-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; ATHLON-NEXT: cmovnel %eax, %ecx +; ATHLON-NEXT: flds (%ecx) ; ATHLON-NEXT: retl ; ; MCU-LABEL: test3: ; MCU: # %bb.0: # %entry -; MCU-NEXT: xorl %ecx, %ecx +; MCU-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %ecx ; MCU-NEXT: testl %eax, %eax -; MCU-NEXT: sete %cl -; MCU-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(,%ecx,4) +; MCU-NEXT: jne .LBB2_2 +; MCU-NEXT: # %bb.1: +; MCU-NEXT: addl $4, %ecx +; MCU-NEXT: .LBB2_2: # %entry +; MCU-NEXT: flds (%ecx) ; MCU-NEXT: retl entry: %0 = icmp eq i32 %x, 0 @@ -188,12 +192,12 @@ ; ATHLON: ## %bb.0: ## %entry ; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax ; ATHLON-NEXT: fldl {{[0-9]+}}(%esp) +; ATHLON-NEXT: leal 4(%eax), %ecx ; ATHLON-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; ATHLON-NEXT: xorl %ecx, %ecx ; ATHLON-NEXT: fucompi %st(1), %st ; ATHLON-NEXT: fstp %st(0) -; ATHLON-NEXT: seta %cl -; ATHLON-NEXT: movsbl (%eax,%ecx,4), %eax +; ATHLON-NEXT: cmovbel %eax, %ecx +; ATHLON-NEXT: movsbl (%ecx), %eax ; ATHLON-NEXT: retl ; ; MCU-LABEL: test4: @@ -203,11 +207,13 @@ ; MCU-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ; MCU-NEXT: fucompp ; MCU-NEXT: fnstsw %ax -; MCU-NEXT: xorl %edx, %edx ; MCU-NEXT: # kill: def $ah killed $ah killed $ax ; MCU-NEXT: sahf -; MCU-NEXT: seta %dl -; MCU-NEXT: movzbl (%ecx,%edx,4), %eax +; MCU-NEXT: jbe .LBB3_2 +; MCU-NEXT: # %bb.1: +; MCU-NEXT: addl $4, %ecx +; MCU-NEXT: .LBB3_2: # %entry +; MCU-NEXT: movzbl (%ecx), %eax ; MCU-NEXT: retl entry: %0 = fcmp olt double %F, 4.200000e+01 @@ -393,39 +399,40 @@ define x86_fp80 @test7(i32 %tmp8) nounwind { ; GENERIC-LABEL: test7: ; GENERIC: ## %bb.0: -; GENERIC-NEXT: xorl %eax, %eax +; GENERIC-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax +; GENERIC-NEXT: leaq 16(%rax), %rcx ; GENERIC-NEXT: testl %edi, %edi -; GENERIC-NEXT: setns %al -; GENERIC-NEXT: shlq $4, %rax -; GENERIC-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx -; GENERIC-NEXT: fldt (%rax,%rcx) +; GENERIC-NEXT: cmovsq %rax, %rcx +; GENERIC-NEXT: fldt (%rcx) ; GENERIC-NEXT: retq ; ; ATOM-LABEL: test7: ; ATOM: ## %bb.0: -; ATOM-NEXT: xorl %eax, %eax -; ATOM-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx +; ATOM-NEXT: leaq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax ; ATOM-NEXT: testl %edi, %edi -; ATOM-NEXT: setns %al -; ATOM-NEXT: shlq $4, %rax -; ATOM-NEXT: fldt (%rax,%rcx) +; ATOM-NEXT: leaq 16(%rax), %rcx +; ATOM-NEXT: cmovsq %rax, %rcx +; ATOM-NEXT: fldt (%rcx) ; ATOM-NEXT: retq ; ; ATHLON-LABEL: test7: ; ATHLON: ## %bb.0: -; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax -; ATHLON-NEXT: notl %eax -; ATHLON-NEXT: shrl $27, %eax -; ATHLON-NEXT: andl $-16, %eax -; ATHLON-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%eax) +; ATHLON-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; ATHLON-NEXT: leal 16(%eax), %ecx +; ATHLON-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; ATHLON-NEXT: cmovsl %eax, %ecx +; ATHLON-NEXT: fldt (%ecx) ; ATHLON-NEXT: retl ; ; MCU-LABEL: test7: ; MCU: # %bb.0: -; MCU-NEXT: notl %eax -; MCU-NEXT: shrl $27, %eax -; MCU-NEXT: andl $-16, %eax -; MCU-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%eax) +; MCU-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %ecx +; MCU-NEXT: testl %eax, %eax +; MCU-NEXT: js .LBB6_2 +; MCU-NEXT: # %bb.1: +; MCU-NEXT: addl $16, %ecx +; MCU-NEXT: .LBB6_2: +; MCU-NEXT: fldt (%ecx) ; MCU-NEXT: retl %tmp9 = icmp sgt i32 %tmp8, -1 %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000 diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll --- a/llvm/test/CodeGen/X86/select_const.ll +++ b/llvm/test/CodeGen/X86/select_const.ll @@ -914,8 +914,8 @@ ; X86-NEXT: movl $23, %eax ; X86-NEXT: .LBB39_2: ; X86-NEXT: setne %dl -; X86-NEXT: movzbl %dl, %edx ; X86-NEXT: andl $1, %eax +; X86-NEXT: movzbl %dl, %edx ; X86-NEXT: xorl $1, %esi ; X86-NEXT: xorl $1, %ecx ; X86-NEXT: xorl %ebx, %ebx diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -262,9 +262,12 @@ define i64 @PR40657(i8 %var2, i8 %var9) { ; CHECK-LABEL: PR40657: ; CHECK: # %bb.0: -; CHECK-NEXT: addb %sil, %dil -; CHECK-NEXT: incb %dil -; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: incb %sil +; CHECK-NEXT: leal 1(%rdi), %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: testb $1, %sil +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: retq %var6 = trunc i8 %var9 to i1 diff --git a/llvm/test/CodeGen/X86/uint64-to-float.ll b/llvm/test/CodeGen/X86/uint64-to-float.ll --- a/llvm/test/CodeGen/X86/uint64-to-float.ll +++ b/llvm/test/CodeGen/X86/uint64-to-float.ll @@ -15,12 +15,14 @@ ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: shrl $31, %eax +; X86-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; X86-NEXT: leal 4(%eax), %ecx +; X86-NEXT: cmpl $0, 12(%ebp) +; X86-NEXT: cmovnsl %eax, %ecx ; X86-NEXT: fildll {{[0-9]+}}(%esp) -; X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; X86-NEXT: fadds (%ecx) ; X86-NEXT: fstps {{[0-9]+}}(%esp) ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: movss %xmm0, (%esp) @@ -51,17 +53,19 @@ ; X86-WIN-NEXT: movl %esp, %ebp ; X86-WIN-NEXT: andl $-8, %esp ; X86-WIN-NEXT: subl $24, %esp -; X86-WIN-NEXT: movl 12(%ebp), %eax ; X86-WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-WIN-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; X86-WIN-NEXT: shrl $31, %eax +; X86-WIN-NEXT: movl $__real@5f80000000000000, %eax +; X86-WIN-NEXT: leal 4(%eax), %ecx +; X86-WIN-NEXT: cmpl $0, 12(%ebp) +; X86-WIN-NEXT: cmovnsl %eax, %ecx ; X86-WIN-NEXT: fildll {{[0-9]+}}(%esp) ; X86-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-WIN-NEXT: orl $768, %ecx # imm = 0x300 -; X86-WIN-NEXT: movw %cx, {{[0-9]+}}(%esp) +; X86-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-WIN-NEXT: orl $768, %eax # imm = 0x300 +; X86-WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X86-WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-WIN-NEXT: fadds __real@5f80000000000000(,%eax,4) +; X86-WIN-NEXT: fadds (%ecx) ; X86-WIN-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-WIN-NEXT: fstps {{[0-9]+}}(%esp) ; X86-WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero diff --git a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll @@ -18,18 +18,17 @@ define i64 @ll_a_op_b__2(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a_op_b__2: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: movq %rsi, %rcx -; DEFAULT-NEXT: movq %rdi, %rax -; DEFAULT-NEXT: shlq %cl, %rax -; DEFAULT-NEXT: cmpq $-2, %rax -; DEFAULT-NEXT: jle .LBB0_1 -; DEFAULT-NEXT: # %bb.2: # %return -; DEFAULT-NEXT: movq %rcx, %rax -; DEFAULT-NEXT: retq -; DEFAULT-NEXT: .LBB0_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rcx, %rax +; DEFAULT-NEXT: movq %rsi, %rax +; DEFAULT-NEXT: movq %rdi, %rdx +; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: shlq %cl, %rdx +; DEFAULT-NEXT: cmpq $-2, %rdx +; DEFAULT-NEXT: jg .LBB0_2 +; DEFAULT-NEXT: # %bb.1: # %if.end ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: cmpq $-2, %rdx +; DEFAULT-NEXT: cmovneq %rdi, %rax +; DEFAULT-NEXT: .LBB0_2: # %return ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a_op_b__2: @@ -41,9 +40,9 @@ ; EQ2ICMP-NEXT: cmpq $-2, %rdx ; EQ2ICMP-NEXT: jg .LBB0_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovlq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: cmpq $-2, %rdx +; EQ2ICMP-NEXT: cmovlq %rdi, %rax ; EQ2ICMP-NEXT: .LBB0_2: # %return ; EQ2ICMP-NEXT: retq entry: @@ -64,19 +63,18 @@ define i64 @ll_a_op_b__1(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a_op_b__1: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: movq %rsi, %rcx -; DEFAULT-NEXT: movq %rdi, %rax -; DEFAULT-NEXT: shlq %cl, %rax -; DEFAULT-NEXT: testq %rax, %rax +; DEFAULT-NEXT: movq %rsi, %rax +; DEFAULT-NEXT: movq %rdi, %rdx +; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: shlq %cl, %rdx +; DEFAULT-NEXT: testq %rdx, %rdx ; DEFAULT-NEXT: js .LBB1_1 ; DEFAULT-NEXT: # %bb.2: # %return -; DEFAULT-NEXT: movq %rcx, %rax ; DEFAULT-NEXT: retq ; DEFAULT-NEXT: .LBB1_1: # %if.end -; DEFAULT-NEXT: cmpq $-1, %rax -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rcx, %rax ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: cmpq $-1, %rdx +; DEFAULT-NEXT: cmovneq %rdi, %rax ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a_op_b__1: @@ -90,10 +88,9 @@ ; EQ2ICMP-NEXT: # %bb.2: # %return ; EQ2ICMP-NEXT: retq ; EQ2ICMP-NEXT: .LBB1_1: # %if.end -; EQ2ICMP-NEXT: cmpq $-1, %rdx -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovlq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: cmpq $-1, %rdx +; EQ2ICMP-NEXT: cmovlq %rdi, %rax ; EQ2ICMP-NEXT: retq entry: %shl = shl i64 %a, %b @@ -113,18 +110,18 @@ define i64 @ll_a_op_b_0(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a_op_b_0: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: movq %rsi, %rcx -; DEFAULT-NEXT: movq %rdi, %rax -; DEFAULT-NEXT: shlq %cl, %rax -; DEFAULT-NEXT: testq %rax, %rax +; DEFAULT-NEXT: movq %rsi, %rax +; DEFAULT-NEXT: movq %rdi, %rdx +; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: shlq %cl, %rdx +; DEFAULT-NEXT: testq %rdx, %rdx ; DEFAULT-NEXT: jle .LBB2_1 ; DEFAULT-NEXT: # %bb.2: # %return -; DEFAULT-NEXT: movq %rcx, %rax ; DEFAULT-NEXT: retq ; DEFAULT-NEXT: .LBB2_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rcx, %rax ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: testq %rdx, %rdx +; DEFAULT-NEXT: cmovneq %rdi, %rax ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a_op_b_0: @@ -138,9 +135,9 @@ ; EQ2ICMP-NEXT: # %bb.2: # %return ; EQ2ICMP-NEXT: retq ; EQ2ICMP-NEXT: .LBB2_1: # %if.end -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovsq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: testq %rdx, %rdx +; EQ2ICMP-NEXT: cmovsq %rdi, %rax ; EQ2ICMP-NEXT: retq entry: %shl = shl i64 %a, %b @@ -160,18 +157,17 @@ define i64 @ll_a_op_b_1(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a_op_b_1: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: movq %rsi, %rcx -; DEFAULT-NEXT: movq %rdi, %rax -; DEFAULT-NEXT: shlq %cl, %rax -; DEFAULT-NEXT: cmpq $1, %rax -; DEFAULT-NEXT: jle .LBB3_1 -; DEFAULT-NEXT: # %bb.2: # %return -; DEFAULT-NEXT: movq %rcx, %rax -; DEFAULT-NEXT: retq -; DEFAULT-NEXT: .LBB3_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rcx, %rax +; DEFAULT-NEXT: movq %rsi, %rax +; DEFAULT-NEXT: movq %rdi, %rdx +; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: shlq %cl, %rdx +; DEFAULT-NEXT: cmpq $1, %rdx +; DEFAULT-NEXT: jg .LBB3_2 +; DEFAULT-NEXT: # %bb.1: # %if.end ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: cmpq $1, %rdx +; DEFAULT-NEXT: cmovneq %rdi, %rax +; DEFAULT-NEXT: .LBB3_2: # %return ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a_op_b_1: @@ -183,9 +179,9 @@ ; EQ2ICMP-NEXT: cmpq $1, %rdx ; EQ2ICMP-NEXT: jg .LBB3_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovlq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: testq %rdx, %rdx +; EQ2ICMP-NEXT: cmovleq %rdi, %rax ; EQ2ICMP-NEXT: .LBB3_2: # %return ; EQ2ICMP-NEXT: retq entry: @@ -206,18 +202,17 @@ define i64 @ll_a_op_b_2(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a_op_b_2: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: movq %rsi, %rcx -; DEFAULT-NEXT: movq %rdi, %rax -; DEFAULT-NEXT: shlq %cl, %rax -; DEFAULT-NEXT: cmpq $2, %rax -; DEFAULT-NEXT: jle .LBB4_1 -; DEFAULT-NEXT: # %bb.2: # %return -; DEFAULT-NEXT: movq %rcx, %rax -; DEFAULT-NEXT: retq -; DEFAULT-NEXT: .LBB4_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rcx, %rax +; DEFAULT-NEXT: movq %rsi, %rax +; DEFAULT-NEXT: movq %rdi, %rdx +; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: shlq %cl, %rdx +; DEFAULT-NEXT: cmpq $2, %rdx +; DEFAULT-NEXT: jg .LBB4_2 +; DEFAULT-NEXT: # %bb.1: # %if.end ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: cmpq $2, %rdx +; DEFAULT-NEXT: cmovneq %rdi, %rax +; DEFAULT-NEXT: .LBB4_2: # %return ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a_op_b_2: @@ -229,9 +224,9 @@ ; EQ2ICMP-NEXT: cmpq $2, %rdx ; EQ2ICMP-NEXT: jg .LBB4_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovlq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: cmpq $2, %rdx +; EQ2ICMP-NEXT: cmovlq %rdi, %rax ; EQ2ICMP-NEXT: .LBB4_2: # %return ; EQ2ICMP-NEXT: retq entry: @@ -252,15 +247,14 @@ define i64 @ll_a__2(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a__2: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: cmpq $-2, %rdi -; DEFAULT-NEXT: jle .LBB5_1 -; DEFAULT-NEXT: # %bb.2: # %return ; DEFAULT-NEXT: movq %rsi, %rax -; DEFAULT-NEXT: retq -; DEFAULT-NEXT: .LBB5_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rsi, %rax +; DEFAULT-NEXT: cmpq $-2, %rdi +; DEFAULT-NEXT: jg .LBB5_2 +; DEFAULT-NEXT: # %bb.1: # %if.end ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: cmpq $-2, %rdi +; DEFAULT-NEXT: cmovneq %rdi, %rax +; DEFAULT-NEXT: .LBB5_2: # %return ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a__2: @@ -269,9 +263,9 @@ ; EQ2ICMP-NEXT: cmpq $-2, %rdi ; EQ2ICMP-NEXT: jg .LBB5_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovlq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: cmpq $-2, %rdi +; EQ2ICMP-NEXT: cmovlq %rdi, %rax ; EQ2ICMP-NEXT: .LBB5_2: # %return ; EQ2ICMP-NEXT: retq entry: @@ -291,16 +285,15 @@ define i64 @ll_a__1(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a__1: ; DEFAULT: # %bb.0: # %entry +; DEFAULT-NEXT: movq %rsi, %rax ; DEFAULT-NEXT: testq %rdi, %rdi ; DEFAULT-NEXT: js .LBB6_1 ; DEFAULT-NEXT: # %bb.2: # %return -; DEFAULT-NEXT: movq %rsi, %rax ; DEFAULT-NEXT: retq ; DEFAULT-NEXT: .LBB6_1: # %if.end -; DEFAULT-NEXT: cmpq $-1, %rdi -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rsi, %rax ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: cmpq $-1, %rdi +; DEFAULT-NEXT: cmovneq %rdi, %rax ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a__1: @@ -311,10 +304,9 @@ ; EQ2ICMP-NEXT: # %bb.2: # %return ; EQ2ICMP-NEXT: retq ; EQ2ICMP-NEXT: .LBB6_1: # %if.end -; EQ2ICMP-NEXT: cmpq $-1, %rdi -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovlq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: cmpq $-1, %rdi +; EQ2ICMP-NEXT: cmovlq %rdi, %rax ; EQ2ICMP-NEXT: retq entry: %cmp = icmp sgt i64 %a, -1 @@ -333,15 +325,15 @@ define i64 @ll_a_0(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a_0: ; DEFAULT: # %bb.0: # %entry +; DEFAULT-NEXT: movq %rsi, %rax ; DEFAULT-NEXT: testq %rdi, %rdi ; DEFAULT-NEXT: jle .LBB7_1 ; DEFAULT-NEXT: # %bb.2: # %return -; DEFAULT-NEXT: movq %rsi, %rax ; DEFAULT-NEXT: retq ; DEFAULT-NEXT: .LBB7_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rsi, %rax ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: testq %rdi, %rdi +; DEFAULT-NEXT: cmovneq %rdi, %rax ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a_0: @@ -352,9 +344,9 @@ ; EQ2ICMP-NEXT: # %bb.2: # %return ; EQ2ICMP-NEXT: retq ; EQ2ICMP-NEXT: .LBB7_1: # %if.end -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovsq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: testq %rdi, %rdi +; EQ2ICMP-NEXT: cmovsq %rdi, %rax ; EQ2ICMP-NEXT: retq entry: %cmp = icmp sgt i64 %a, 0 @@ -373,15 +365,14 @@ define i64 @ll_a_1(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a_1: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: cmpq $1, %rdi -; DEFAULT-NEXT: jle .LBB8_1 -; DEFAULT-NEXT: # %bb.2: # %return ; DEFAULT-NEXT: movq %rsi, %rax -; DEFAULT-NEXT: retq -; DEFAULT-NEXT: .LBB8_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rsi, %rax +; DEFAULT-NEXT: cmpq $1, %rdi +; DEFAULT-NEXT: jg .LBB8_2 +; DEFAULT-NEXT: # %bb.1: # %if.end ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: cmpq $1, %rdi +; DEFAULT-NEXT: cmovneq %rdi, %rax +; DEFAULT-NEXT: .LBB8_2: # %return ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a_1: @@ -390,9 +381,9 @@ ; EQ2ICMP-NEXT: cmpq $1, %rdi ; EQ2ICMP-NEXT: jg .LBB8_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovlq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: testq %rdi, %rdi +; EQ2ICMP-NEXT: cmovleq %rdi, %rax ; EQ2ICMP-NEXT: .LBB8_2: # %return ; EQ2ICMP-NEXT: retq entry: @@ -412,15 +403,14 @@ define i64 @ll_a_2(i64 %a, i64 %b) { ; DEFAULT-LABEL: ll_a_2: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: cmpq $2, %rdi -; DEFAULT-NEXT: jle .LBB9_1 -; DEFAULT-NEXT: # %bb.2: # %return ; DEFAULT-NEXT: movq %rsi, %rax -; DEFAULT-NEXT: retq -; DEFAULT-NEXT: .LBB9_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmoveq %rsi, %rax +; DEFAULT-NEXT: cmpq $2, %rdi +; DEFAULT-NEXT: jg .LBB9_2 +; DEFAULT-NEXT: # %bb.1: # %if.end ; DEFAULT-NEXT: imulq %rdi, %rax +; DEFAULT-NEXT: cmpq $2, %rdi +; DEFAULT-NEXT: cmovneq %rdi, %rax +; DEFAULT-NEXT: .LBB9_2: # %return ; DEFAULT-NEXT: retq ; ; EQ2ICMP-LABEL: ll_a_2: @@ -429,9 +419,9 @@ ; EQ2ICMP-NEXT: cmpq $2, %rdi ; EQ2ICMP-NEXT: jg .LBB9_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %ecx -; EQ2ICMP-NEXT: cmovlq %rcx, %rax ; EQ2ICMP-NEXT: imulq %rdi, %rax +; EQ2ICMP-NEXT: cmpq $2, %rdi +; EQ2ICMP-NEXT: cmovlq %rdi, %rax ; EQ2ICMP-NEXT: .LBB9_2: # %return ; EQ2ICMP-NEXT: retq entry: @@ -457,10 +447,9 @@ ; DEFAULT-NEXT: cmpl $-2, %eax ; DEFAULT-NEXT: jg .LBB10_2 ; DEFAULT-NEXT: # %bb.1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %ecx, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: imull %edi, %ecx +; DEFAULT-NEXT: cmpl $-2, %eax +; DEFAULT-NEXT: cmovnel %edi, %ecx ; DEFAULT-NEXT: .LBB10_2: # %return ; DEFAULT-NEXT: movslq %ecx, %rax ; DEFAULT-NEXT: retq @@ -473,9 +462,9 @@ ; EQ2ICMP-NEXT: cmpl $-2, %eax ; EQ2ICMP-NEXT: jg .LBB10_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovll %eax, %ecx ; EQ2ICMP-NEXT: imull %edi, %ecx +; EQ2ICMP-NEXT: cmpl $-2, %eax +; EQ2ICMP-NEXT: cmovll %edi, %ecx ; EQ2ICMP-NEXT: .LBB10_2: # %return ; EQ2ICMP-NEXT: movslq %ecx, %rax ; EQ2ICMP-NEXT: retq @@ -508,11 +497,9 @@ ; DEFAULT-NEXT: movslq %ecx, %rax ; DEFAULT-NEXT: retq ; DEFAULT-NEXT: .LBB11_1: # %if.end +; DEFAULT-NEXT: imull %edi, %ecx ; DEFAULT-NEXT: cmpl $-1, %eax -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %ecx, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: cmovnel %edi, %ecx ; DEFAULT-NEXT: movslq %ecx, %rax ; DEFAULT-NEXT: retq ; @@ -527,10 +514,9 @@ ; EQ2ICMP-NEXT: movslq %ecx, %rax ; EQ2ICMP-NEXT: retq ; EQ2ICMP-NEXT: .LBB11_1: # %if.end -; EQ2ICMP-NEXT: cmpl $-1, %eax -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovll %eax, %ecx ; EQ2ICMP-NEXT: imull %edi, %ecx +; EQ2ICMP-NEXT: cmpl $-1, %eax +; EQ2ICMP-NEXT: cmovll %edi, %ecx ; EQ2ICMP-NEXT: movslq %ecx, %rax ; EQ2ICMP-NEXT: retq entry: @@ -562,10 +548,9 @@ ; DEFAULT-NEXT: movslq %ecx, %rax ; DEFAULT-NEXT: retq ; DEFAULT-NEXT: .LBB12_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %ecx, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: imull %edi, %ecx +; DEFAULT-NEXT: testl %eax, %eax +; DEFAULT-NEXT: cmovnel %edi, %ecx ; DEFAULT-NEXT: movslq %ecx, %rax ; DEFAULT-NEXT: retq ; @@ -580,9 +565,9 @@ ; EQ2ICMP-NEXT: movslq %ecx, %rax ; EQ2ICMP-NEXT: retq ; EQ2ICMP-NEXT: .LBB12_1: # %if.end -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovsl %eax, %ecx ; EQ2ICMP-NEXT: imull %edi, %ecx +; EQ2ICMP-NEXT: testl %eax, %eax +; EQ2ICMP-NEXT: cmovsl %edi, %ecx ; EQ2ICMP-NEXT: movslq %ecx, %rax ; EQ2ICMP-NEXT: retq entry: @@ -611,10 +596,9 @@ ; DEFAULT-NEXT: cmpl $1, %eax ; DEFAULT-NEXT: jg .LBB13_2 ; DEFAULT-NEXT: # %bb.1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %ecx, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: imull %edi, %ecx +; DEFAULT-NEXT: cmpl $1, %eax +; DEFAULT-NEXT: cmovnel %edi, %ecx ; DEFAULT-NEXT: .LBB13_2: # %return ; DEFAULT-NEXT: movslq %ecx, %rax ; DEFAULT-NEXT: retq @@ -627,9 +611,9 @@ ; EQ2ICMP-NEXT: cmpl $1, %eax ; EQ2ICMP-NEXT: jg .LBB13_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovll %eax, %ecx ; EQ2ICMP-NEXT: imull %edi, %ecx +; EQ2ICMP-NEXT: testl %eax, %eax +; EQ2ICMP-NEXT: cmovlel %edi, %ecx ; EQ2ICMP-NEXT: .LBB13_2: # %return ; EQ2ICMP-NEXT: movslq %ecx, %rax ; EQ2ICMP-NEXT: retq @@ -659,10 +643,9 @@ ; DEFAULT-NEXT: cmpl $2, %eax ; DEFAULT-NEXT: jg .LBB14_2 ; DEFAULT-NEXT: # %bb.1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %ecx, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %ecx +; DEFAULT-NEXT: imull %edi, %ecx +; DEFAULT-NEXT: cmpl $2, %eax +; DEFAULT-NEXT: cmovnel %edi, %ecx ; DEFAULT-NEXT: .LBB14_2: # %return ; DEFAULT-NEXT: movslq %ecx, %rax ; DEFAULT-NEXT: retq @@ -675,9 +658,9 @@ ; EQ2ICMP-NEXT: cmpl $2, %eax ; EQ2ICMP-NEXT: jg .LBB14_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovll %eax, %ecx ; EQ2ICMP-NEXT: imull %edi, %ecx +; EQ2ICMP-NEXT: cmpl $2, %eax +; EQ2ICMP-NEXT: cmovll %edi, %ecx ; EQ2ICMP-NEXT: .LBB14_2: # %return ; EQ2ICMP-NEXT: movslq %ecx, %rax ; EQ2ICMP-NEXT: retq @@ -704,10 +687,9 @@ ; DEFAULT-NEXT: cmpl $-2, %edi ; DEFAULT-NEXT: jg .LBB15_2 ; DEFAULT-NEXT: # %bb.1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %esi, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %esi +; DEFAULT-NEXT: imull %edi, %esi +; DEFAULT-NEXT: cmpl $-2, %edi +; DEFAULT-NEXT: cmovnel %edi, %esi ; DEFAULT-NEXT: .LBB15_2: # %return ; DEFAULT-NEXT: movslq %esi, %rax ; DEFAULT-NEXT: retq @@ -717,9 +699,9 @@ ; EQ2ICMP-NEXT: cmpl $-2, %edi ; EQ2ICMP-NEXT: jg .LBB15_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovll %eax, %esi ; EQ2ICMP-NEXT: imull %edi, %esi +; EQ2ICMP-NEXT: cmpl $-2, %edi +; EQ2ICMP-NEXT: cmovll %edi, %esi ; EQ2ICMP-NEXT: .LBB15_2: # %return ; EQ2ICMP-NEXT: movslq %esi, %rax ; EQ2ICMP-NEXT: retq @@ -748,11 +730,9 @@ ; DEFAULT-NEXT: movslq %esi, %rax ; DEFAULT-NEXT: retq ; DEFAULT-NEXT: .LBB16_1: # %if.end +; DEFAULT-NEXT: imull %edi, %esi ; DEFAULT-NEXT: cmpl $-1, %edi -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %esi, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %esi +; DEFAULT-NEXT: cmovnel %edi, %esi ; DEFAULT-NEXT: movslq %esi, %rax ; DEFAULT-NEXT: retq ; @@ -764,10 +744,9 @@ ; EQ2ICMP-NEXT: movslq %esi, %rax ; EQ2ICMP-NEXT: retq ; EQ2ICMP-NEXT: .LBB16_1: # %if.end -; EQ2ICMP-NEXT: cmpl $-1, %edi -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovll %eax, %esi ; EQ2ICMP-NEXT: imull %edi, %esi +; EQ2ICMP-NEXT: cmpl $-1, %edi +; EQ2ICMP-NEXT: cmovll %edi, %esi ; EQ2ICMP-NEXT: movslq %esi, %rax ; EQ2ICMP-NEXT: retq entry: @@ -795,10 +774,9 @@ ; DEFAULT-NEXT: movslq %esi, %rax ; DEFAULT-NEXT: retq ; DEFAULT-NEXT: .LBB17_1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %esi, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %esi +; DEFAULT-NEXT: imull %edi, %esi +; DEFAULT-NEXT: testl %edi, %edi +; DEFAULT-NEXT: cmovnel %edi, %esi ; DEFAULT-NEXT: movslq %esi, %rax ; DEFAULT-NEXT: retq ; @@ -810,9 +788,9 @@ ; EQ2ICMP-NEXT: movslq %esi, %rax ; EQ2ICMP-NEXT: retq ; EQ2ICMP-NEXT: .LBB17_1: # %if.end -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovsl %eax, %esi ; EQ2ICMP-NEXT: imull %edi, %esi +; EQ2ICMP-NEXT: testl %edi, %edi +; EQ2ICMP-NEXT: cmovsl %edi, %esi ; EQ2ICMP-NEXT: movslq %esi, %rax ; EQ2ICMP-NEXT: retq entry: @@ -837,10 +815,9 @@ ; DEFAULT-NEXT: cmpl $1, %edi ; DEFAULT-NEXT: jg .LBB18_2 ; DEFAULT-NEXT: # %bb.1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %esi, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %esi +; DEFAULT-NEXT: imull %edi, %esi +; DEFAULT-NEXT: cmpl $1, %edi +; DEFAULT-NEXT: cmovnel %edi, %esi ; DEFAULT-NEXT: .LBB18_2: # %return ; DEFAULT-NEXT: movslq %esi, %rax ; DEFAULT-NEXT: retq @@ -850,9 +827,9 @@ ; EQ2ICMP-NEXT: cmpl $1, %edi ; EQ2ICMP-NEXT: jg .LBB18_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovll %eax, %esi ; EQ2ICMP-NEXT: imull %edi, %esi +; EQ2ICMP-NEXT: testl %edi, %edi +; EQ2ICMP-NEXT: cmovlel %edi, %esi ; EQ2ICMP-NEXT: .LBB18_2: # %return ; EQ2ICMP-NEXT: movslq %esi, %rax ; EQ2ICMP-NEXT: retq @@ -878,10 +855,9 @@ ; DEFAULT-NEXT: cmpl $2, %edi ; DEFAULT-NEXT: jg .LBB19_2 ; DEFAULT-NEXT: # %bb.1: # %if.end -; DEFAULT-NEXT: movl $1, %eax -; DEFAULT-NEXT: cmovel %esi, %eax -; DEFAULT-NEXT: imull %edi, %eax -; DEFAULT-NEXT: movl %eax, %esi +; DEFAULT-NEXT: imull %edi, %esi +; DEFAULT-NEXT: cmpl $2, %edi +; DEFAULT-NEXT: cmovnel %edi, %esi ; DEFAULT-NEXT: .LBB19_2: # %return ; DEFAULT-NEXT: movslq %esi, %rax ; DEFAULT-NEXT: retq @@ -891,9 +867,9 @@ ; EQ2ICMP-NEXT: cmpl $2, %edi ; EQ2ICMP-NEXT: jg .LBB19_2 ; EQ2ICMP-NEXT: # %bb.1: # %if.end -; EQ2ICMP-NEXT: movl $1, %eax -; EQ2ICMP-NEXT: cmovll %eax, %esi ; EQ2ICMP-NEXT: imull %edi, %esi +; EQ2ICMP-NEXT: cmpl $2, %edi +; EQ2ICMP-NEXT: cmovll %edi, %esi ; EQ2ICMP-NEXT: .LBB19_2: # %return ; EQ2ICMP-NEXT: movslq %esi, %rax ; EQ2ICMP-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll @@ -274,18 +274,23 @@ ; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; SSE-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp) ; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) +; SSE-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE-32-NEXT: leal 4(%eax), %ecx ; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; SSE-32-NEXT: movd %xmm1, %eax -; SSE-32-NEXT: shrl $31, %eax +; SSE-32-NEXT: movd %xmm1, %edx +; SSE-32-NEXT: testl %edx, %edx +; SSE-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; SSE-32-NEXT: cmovsl %ecx, %edx ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE-32-NEXT: fadds (%edx) ; SSE-32-NEXT: fstps (%esp) ; SSE-32-NEXT: wait ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; SSE-32-NEXT: movd %xmm0, %eax -; SSE-32-NEXT: shrl $31, %eax +; SSE-32-NEXT: movd %xmm0, %edx +; SSE-32-NEXT: testl %edx, %edx +; SSE-32-NEXT: cmovsl %ecx, %eax ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE-32-NEXT: fadds (%eax) ; SSE-32-NEXT: fstps {{[0-9]+}}(%esp) ; SSE-32-NEXT: wait ; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero @@ -343,18 +348,23 @@ ; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; SSE41-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp) ; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) +; SSE41-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE41-32-NEXT: leal 4(%eax), %ecx ; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; SSE41-32-NEXT: movd %xmm1, %eax -; SSE41-32-NEXT: shrl $31, %eax +; SSE41-32-NEXT: movd %xmm1, %edx +; SSE41-32-NEXT: testl %edx, %edx +; SSE41-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; SSE41-32-NEXT: cmovsl %ecx, %edx ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE41-32-NEXT: fadds (%edx) ; SSE41-32-NEXT: fstps (%esp) ; SSE41-32-NEXT: wait ; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; SSE41-32-NEXT: movd %xmm0, %eax -; SSE41-32-NEXT: shrl $31, %eax +; SSE41-32-NEXT: movd %xmm0, %edx +; SSE41-32-NEXT: testl %edx, %edx +; SSE41-32-NEXT: cmovsl %ecx, %eax ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE41-32-NEXT: fadds (%eax) ; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp) ; SSE41-32-NEXT: wait ; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero @@ -412,16 +422,21 @@ ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) -; AVX-32-NEXT: vextractps $1, %xmm0, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; AVX-32-NEXT: leal 4(%eax), %ecx +; AVX-32-NEXT: vextractps $1, %xmm0, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; AVX-32-NEXT: cmovsl %ecx, %edx ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%edx) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) ; AVX-32-NEXT: wait -; AVX-32-NEXT: vextractps $3, %xmm0, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: vextractps $3, %xmm0, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: cmovsl %ecx, %eax ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%eax) ; AVX-32-NEXT: fstps (%esp) ; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -1272,18 +1287,23 @@ ; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; SSE-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp) +; SSE-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE-32-NEXT: leal 4(%eax), %ecx ; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE-32-NEXT: movd %xmm1, %eax -; SSE-32-NEXT: shrl $31, %eax +; SSE-32-NEXT: movd %xmm1, %edx +; SSE-32-NEXT: testl %edx, %edx +; SSE-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; SSE-32-NEXT: cmovsl %ecx, %edx ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE-32-NEXT: fadds (%edx) ; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp) ; SSE-32-NEXT: wait ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; SSE-32-NEXT: movd %xmm0, %eax -; SSE-32-NEXT: shrl $31, %eax +; SSE-32-NEXT: movd %xmm0, %edx +; SSE-32-NEXT: testl %edx, %edx +; SSE-32-NEXT: cmovsl %ecx, %eax ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE-32-NEXT: fadds (%eax) ; SSE-32-NEXT: fstpl (%esp) ; SSE-32-NEXT: wait ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -1340,18 +1360,23 @@ ; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) ; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; SSE41-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp) +; SSE41-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE41-32-NEXT: leal 4(%eax), %ecx ; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE41-32-NEXT: movd %xmm1, %eax -; SSE41-32-NEXT: shrl $31, %eax +; SSE41-32-NEXT: movd %xmm1, %edx +; SSE41-32-NEXT: testl %edx, %edx +; SSE41-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; SSE41-32-NEXT: cmovsl %ecx, %edx ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE41-32-NEXT: fadds (%edx) ; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp) ; SSE41-32-NEXT: wait ; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; SSE41-32-NEXT: movd %xmm0, %eax -; SSE41-32-NEXT: shrl $31, %eax +; SSE41-32-NEXT: movd %xmm0, %edx +; SSE41-32-NEXT: testl %edx, %edx +; SSE41-32-NEXT: cmovsl %ecx, %eax ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) -; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; SSE41-32-NEXT: fadds (%eax) ; SSE41-32-NEXT: fstpl (%esp) ; SSE41-32-NEXT: wait ; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -1408,16 +1433,21 @@ ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) -; AVX-32-NEXT: vextractps $1, %xmm0, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; AVX-32-NEXT: leal 4(%eax), %ecx +; AVX-32-NEXT: vextractps $1, %xmm0, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; AVX-32-NEXT: cmovsl %ecx, %edx ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%edx) ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: wait -; AVX-32-NEXT: vextractps $3, %xmm0, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: vextractps $3, %xmm0, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: cmovsl %ecx, %eax ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%eax) ; AVX-32-NEXT: fstpl (%esp) ; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll @@ -764,28 +764,37 @@ ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) ; AVX-32-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,3,2,3] ; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) -; AVX-32-NEXT: vextractps $1, %xmm0, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; AVX-32-NEXT: leal 4(%eax), %ecx +; AVX-32-NEXT: vextractps $1, %xmm0, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; AVX-32-NEXT: cmovsl %ecx, %edx ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%edx) ; AVX-32-NEXT: fstpl (%esp) ; AVX-32-NEXT: wait -; AVX-32-NEXT: vextractps $3, %xmm0, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: vextractps $3, %xmm0, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; AVX-32-NEXT: cmovsl %ecx, %edx ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%edx) ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: wait -; AVX-32-NEXT: vextractps $1, %xmm1, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: vextractps $1, %xmm1, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; AVX-32-NEXT: cmovsl %ecx, %edx ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%edx) ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: wait -; AVX-32-NEXT: vextractps $3, %xmm1, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: vextractps $3, %xmm1, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: cmovsl %ecx, %eax ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%eax) ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero @@ -1048,28 +1057,37 @@ ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) ; AVX-32-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,3,2,3] ; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) -; AVX-32-NEXT: vextractps $1, %xmm0, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; AVX-32-NEXT: leal 4(%eax), %ecx +; AVX-32-NEXT: vextractps $1, %xmm0, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; AVX-32-NEXT: cmovsl %ecx, %edx ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%edx) ; AVX-32-NEXT: fstps (%esp) ; AVX-32-NEXT: wait -; AVX-32-NEXT: vextractps $3, %xmm0, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: vextractps $3, %xmm0, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; AVX-32-NEXT: cmovsl %ecx, %edx ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%edx) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) ; AVX-32-NEXT: wait -; AVX-32-NEXT: vextractps $1, %xmm1, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: vextractps $1, %xmm1, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; AVX-32-NEXT: cmovsl %ecx, %edx ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%edx) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) ; AVX-32-NEXT: wait -; AVX-32-NEXT: vextractps $3, %xmm1, %eax -; AVX-32-NEXT: shrl $31, %eax +; AVX-32-NEXT: vextractps $3, %xmm1, %edx +; AVX-32-NEXT: testl %edx, %edx +; AVX-32-NEXT: cmovsl %ecx, %eax ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; AVX-32-NEXT: fadds (%eax) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) ; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll @@ -384,52 +384,69 @@ ; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) ; NODQ-32-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3] ; NODQ-32-NEXT: vmovlps %xmm4, {{[0-9]+}}(%esp) -; NODQ-32-NEXT: vextractps $1, %xmm3, %eax -; NODQ-32-NEXT: shrl $31, %eax -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; NODQ-32-NEXT: leal 4(%eax), %ecx +; NODQ-32-NEXT: vextractps $1, %xmm3, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $3, %xmm3, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $3, %xmm3, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $1, %xmm2, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $1, %xmm2, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $3, %xmm2, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $3, %xmm2, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $1, %xmm0, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $1, %xmm0, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstpl (%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $3, %xmm0, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $3, %xmm0, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $1, %xmm1, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $1, %xmm1, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $3, %xmm1, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $3, %xmm1, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: cmovsl %ecx, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%eax) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero @@ -609,52 +626,69 @@ ; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) ; NODQ-32-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3] ; NODQ-32-NEXT: vmovlps %xmm4, {{[0-9]+}}(%esp) -; NODQ-32-NEXT: vextractps $1, %xmm0, %eax -; NODQ-32-NEXT: shrl $31, %eax -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; NODQ-32-NEXT: leal 4(%eax), %ecx +; NODQ-32-NEXT: vextractps $1, %xmm0, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstps (%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $3, %xmm0, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $3, %xmm0, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $1, %xmm3, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $1, %xmm3, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $3, %xmm3, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $3, %xmm3, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $1, %xmm2, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $1, %xmm2, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $3, %xmm2, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $3, %xmm2, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $1, %xmm1, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $1, %xmm1, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %edx +; NODQ-32-NEXT: cmovsl %ecx, %edx ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%edx) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vextractps $3, %xmm1, %eax -; NODQ-32-NEXT: shrl $31, %eax +; NODQ-32-NEXT: vextractps $3, %xmm1, %edx +; NODQ-32-NEXT: testl %edx, %edx +; NODQ-32-NEXT: cmovsl %ecx, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) +; NODQ-32-NEXT: fadds (%eax) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero diff --git a/llvm/test/CodeGen/X86/vselect-zero.ll b/llvm/test/CodeGen/X86/vselect-zero.ll --- a/llvm/test/CodeGen/X86/vselect-zero.ll +++ b/llvm/test/CodeGen/X86/vselect-zero.ll @@ -170,9 +170,11 @@ define double @fsel_nonzero_constants(double %x, double %y) { ; SSE-LABEL: fsel_nonzero_constants: ; SSE: # %bb.0: -; SSE-NEXT: cmpeqsd %xmm1, %xmm0 -; SSE-NEXT: movq %xmm0, %rax -; SSE-NEXT: andl $1, %eax +; SSE-NEXT: movl ${{\.?LCPI[0-9]+_[0-9]+}}, %eax +; SSE-NEXT: leaq 8(%rax), %rcx +; SSE-NEXT: ucomisd %xmm1, %xmm0 +; SSE-NEXT: cmovneq %rax, %rcx +; SSE-NEXT: cmovpq %rax, %rcx ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll --- a/llvm/test/CodeGen/X86/zext-sext.ll +++ b/llvm/test/CodeGen/X86/zext-sext.ll @@ -15,6 +15,7 @@ ; CHECK-NEXT: subq %rax, %rsi ; CHECK-NEXT: movq (%rdx), %rax ; CHECK-NEXT: movswl 8(%rdi), %edx +; CHECK-NEXT: movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B ; CHECK-NEXT: movswl (%rax,%rsi,2), %eax ; CHECK-NEXT: imull %edx, %eax ; CHECK-NEXT: addl $2138875574, %eax # imm = 0x7F7CA6B6 @@ -25,18 +26,18 @@ ; CHECK-NEXT: andb %dl, %sil ; CHECK-NEXT: movzbl %sil, %edx ; CHECK-NEXT: movslq %eax, %rsi -; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: subq %rsi, %rax ; CHECK-NEXT: negl %edx -; CHECK-NEXT: subq %rax, %rdi -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: testl $-2, %edx -; CHECK-NEXT: cmovneq %rax, %rdi +; CHECK-NEXT: movabsq $-5089792279245435153, %rdx # imm = 0xB95D6CE38F0CCEEF +; CHECK-NEXT: cmovneq %rdi, %rax ; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: cmovnsq %rax, %rdi -; CHECK-NEXT: movq (%rcx), %rax -; CHECK-NEXT: subq %rdi, %rsi -; CHECK-NEXT: leaq -2138875574(%rax,%rsi), %rax -; CHECK-NEXT: movq %rax, (%rcx) +; CHECK-NEXT: cmovnsq %rdi, %rax +; CHECK-NEXT: addq (%rcx), %rax +; CHECK-NEXT: addq %rax, %rdx +; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq entry: %tmp103 = getelementptr inbounds [40 x i16], ptr %a, i64 0, i64 4 diff --git a/llvm/test/tools/llvm-locstats/locstats.ll b/llvm/test/tools/llvm-locstats/locstats.ll --- a/llvm/test/tools/llvm-locstats/locstats.ll +++ b/llvm/test/tools/llvm-locstats/locstats.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; UNSUPPORTED: system-windows ; REQUIRES: x86-registered-target ; RUN: llc %s -o %t0.o -filetype=obj -experimental-debug-variable-locations=true