Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -366,6 +366,11 @@ return CurDAG->getTargetConstant(Imm, DL, MVT::i32); } + /// Return a target constant with the specified value, of type i64. + inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) { + return CurDAG->getTargetConstant(Imm, DL, MVT::i64); + } + /// Return an SDNode that returns the value of the global base register. /// Output instructions required to initialize the global base register, /// if necessary. Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1435,6 +1435,36 @@ } // AddedComplexity = 1 +// Try to use BTS/BTR/BTC for single bit operations on the upper 32-bits. + +def BTRXForm : SDNodeXFormgetAPIntValue().countTrailingOnes(), SDLoc(N)); +}]>; + +def BTCBTSXForm : SDNodeXFormgetAPIntValue().countTrailingZeros(), SDLoc(N)); +}]>; + +def BTRMask64 : ImmLeaf(Imm) && !isInt<32>(Imm) && isPowerOf2_64(~Imm); +}]>; + +def BTCBTSMask64 : ImmLeaf(Imm) && isPowerOf2_64(Imm); +}]>; + +let AddedComplexity = 1 in { + def : Pat<(and GR64:$src1, BTRMask64:$mask), + (BTR64ri8 GR64:$src1, (BTRXForm imm:$mask))>; + def : Pat<(or GR64:$src1, BTCBTSMask64:$mask), + (BTS64ri8 GR64:$src1, (BTCBTSXForm imm:$mask))>; + def : Pat<(xor GR64:$src1, BTCBTSMask64:$mask), + (BTC64ri8 GR64:$src1, (BTCBTSXForm imm:$mask))>; +} + + // sext_inreg patterns def : Pat<(sext_inreg GR32:$src, i16), (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -2361,7 +2361,7 @@ }]>; def AndMask64 : ImmLeaf UINT32_MAX; + return isMask_64(Imm) && !isUInt<32>(Imm); }]>; // Use BEXTR for 64-bit 'and' with large immediate 'mask'. Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -2079,13 +2079,27 @@ } ImmIdx = 1; break; + case Instruction::Or: + case Instruction::Xor: + // We can handle Or/Xor with a power 2 using BTS/BTC. The default path + // expects bit 31 to be sign extended so check for that here. + if (Idx == 1 && Imm.getBitWidth() == 64 && + isPowerOf2_64(Imm.getZExtValue())) + return TTI::TCC_Free; + ImmIdx = 1; + break; case Instruction::And: // We support 64-bit ANDs with immediates with 32-bits of leading zeroes // by using a 32-bit operation with implicit zero extension. Detect such // immediates here as the normal path expects bit 31 to be sign extended. - if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue())) - return TTI::TCC_Free; - LLVM_FALLTHROUGH; + // We can also handle clearing a single bit using the BTR instruction. + if (Idx == 1 && Imm.getBitWidth() == 64) { + uint64_t ImmVal = Imm.getZExtValue(); + if (isUInt<32>(Imm.getZExtValue()) || isPowerOf2_64(~ImmVal)) + return TTI::TCC_Free; + } + ImmIdx = 1; + break; case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -2093,8 +2107,6 @@ case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: - case Instruction::Or: - case Instruction::Xor: ImmIdx = 1; break; // Always return TCC_Free for the shift value of a shift instruction. Index: test/CodeGen/X86/half.ll =================================================================== --- test/CodeGen/X86/half.ll +++ test/CodeGen/X86/half.ll @@ -274,9 +274,8 @@ ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2 ; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm2 -; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rax -; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; CHECK-LIBCALL-NEXT: xorq %rax, %rcx +; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rcx +; CHECK-LIBCALL-NEXT: btcq $63, %rcx ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-LIBCALL-NEXT: cmovaeq %rcx, %rax @@ -290,9 +289,8 @@ ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; BWON-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; BWON-F16C-NEXT: vcvttss2si %xmm2, %rax -; BWON-F16C-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; BWON-F16C-NEXT: xorq %rax, %rcx +; BWON-F16C-NEXT: vcvttss2si %xmm2, %rcx +; BWON-F16C-NEXT: btcq $63, %rcx ; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 ; BWON-F16C-NEXT: cmovaeq %rcx, %rax Index: test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- test/CodeGen/X86/vec_fp_to_int.ll +++ test/CodeGen/X86/vec_fp_to_int.ll @@ -258,17 +258,16 @@ ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: subsd %xmm2, %xmm1 ; SSE-NEXT: cvttsd2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: btcq $63, %rax ; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rcx @@ -282,16 +281,15 @@ ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 ; VEX-NEXT: vcvttsd2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: btcq $63, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rcx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 ; VEX-NEXT: vcvttsd2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: btcq $63, %rax ; VEX-NEXT: vcvttsd2si %xmm0, %rcx ; VEX-NEXT: vucomisd %xmm1, %xmm0 ; VEX-NEXT: cmovaeq %rax, %rcx @@ -342,17 +340,16 @@ ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: subsd %xmm2, %xmm1 ; SSE-NEXT: cvttsd2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: btcq $63, %rax ; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rcx @@ -368,16 +365,15 @@ ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 ; VEX-NEXT: vcvttsd2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: btcq $63, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rcx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 ; VEX-NEXT: vcvttsd2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: btcq $63, %rax ; VEX-NEXT: vcvttsd2si %xmm0, %rcx ; VEX-NEXT: vucomisd %xmm1, %xmm0 ; VEX-NEXT: cmovaeq %rax, %rcx @@ -423,17 +419,16 @@ ; SSE-NEXT: movapd %xmm0, %xmm2 ; SSE-NEXT: subsd %xmm1, %xmm2 ; SSE-NEXT: cvttsd2si %xmm2, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm1, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: btcq $63, %rax ; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm1, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rcx @@ -447,16 +442,15 @@ ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 ; VEX-NEXT: vcvttsd2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: btcq $63, %rax +; VEX-NEXT: vcvttsd2si %xmm0, %rcx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 ; VEX-NEXT: vcvttsd2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: btcq $63, %rax ; VEX-NEXT: vcvttsd2si %xmm0, %rcx ; VEX-NEXT: vucomisd %xmm1, %xmm0 ; VEX-NEXT: cmovaeq %rax, %rcx @@ -502,24 +496,24 @@ ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: subsd %xmm2, %xmm1 ; SSE-NEXT: cvttsd2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 ; SSE-NEXT: cvttsd2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm0 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: cvttsd2si %xmm0, %rax -; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rax, %rcx +; SSE-NEXT: btcq $63, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 ; SSE-NEXT: cmovbq %rax, %rcx ; SSE-NEXT: movq %rcx, %xmm0 @@ -580,40 +574,39 @@ ; SSE-NEXT: movapd %xmm0, %xmm2 ; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; SSE-NEXT: subsd %xmm3, %xmm0 -; SSE-NEXT: cvttsd2si %xmm0, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm2, %rdx +; SSE-NEXT: cvttsd2si %xmm0, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm2, %rcx ; SSE-NEXT: ucomisd %xmm3, %xmm2 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm0 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm0 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: movaps %xmm2, %xmm4 ; SSE-NEXT: subsd %xmm3, %xmm4 -; SSE-NEXT: cvttsd2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm2, %rdx +; SSE-NEXT: cvttsd2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm2, %rcx ; SSE-NEXT: ucomisd %xmm3, %xmm2 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: movapd %xmm1, %xmm2 ; SSE-NEXT: subsd %xmm3, %xmm2 -; SSE-NEXT: cvttsd2si %xmm2, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm1, %rdx +; SSE-NEXT: cvttsd2si %xmm2, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm1, %rcx ; SSE-NEXT: ucomisd %xmm3, %xmm1 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm4 ; SSE-NEXT: subsd %xmm3, %xmm4 -; SSE-NEXT: cvttsd2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm1, %rax +; SSE-NEXT: cvttsd2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm1, %rcx ; SSE-NEXT: ucomisd %xmm3, %xmm1 -; SSE-NEXT: cmovaeq %rcx, %rax -; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movdqa %xmm2, %xmm1 ; SSE-NEXT: retq @@ -624,32 +617,31 @@ ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax -; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttsd2si %xmm2, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttsd2si %xmm2, %rcx ; AVX1-NEXT: vucomisd %xmm1, %xmm2 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] ; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttsd2si %xmm2, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttsd2si %xmm2, %rcx ; AVX1-NEXT: vucomisd %xmm1, %xmm2 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttsd2si %xmm0, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttsd2si %xmm0, %rcx ; AVX1-NEXT: vucomisd %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm4 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax +; AVX1-NEXT: btcq $63, %rax ; AVX1-NEXT: vcvttsd2si %xmm0, %rcx ; AVX1-NEXT: vucomisd %xmm1, %xmm0 ; AVX1-NEXT: cmovaeq %rax, %rcx @@ -664,32 +656,31 @@ ; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax -; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttsd2si %xmm2, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttsd2si %xmm2, %rcx ; AVX2-NEXT: vucomisd %xmm1, %xmm2 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] ; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttsd2si %xmm2, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttsd2si %xmm2, %rcx ; AVX2-NEXT: vucomisd %xmm1, %xmm2 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttsd2si %xmm0, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttsd2si %xmm0, %rcx ; AVX2-NEXT: vucomisd %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm4 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax +; AVX2-NEXT: btcq $63, %rax ; AVX2-NEXT: vcvttsd2si %xmm0, %rcx ; AVX2-NEXT: vucomisd %xmm1, %xmm0 ; AVX2-NEXT: cmovaeq %rax, %rcx @@ -755,40 +746,39 @@ ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE-NEXT: movapd %xmm1, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 -; SSE-NEXT: cvttsd2si %xmm3, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm1, %rdx +; SSE-NEXT: cvttsd2si %xmm3, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm1, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm1 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm3 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm1, %xmm4 ; SSE-NEXT: subsd %xmm2, %xmm4 -; SSE-NEXT: cvttsd2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm1, %rdx +; SSE-NEXT: cvttsd2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm1, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm1 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: subsd %xmm2, %xmm1 -; SSE-NEXT: cvttsd2si %xmm1, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: cvttsd2si %xmm1, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm4 ; SSE-NEXT: subsd %xmm2, %xmm4 -; SSE-NEXT: cvttsd2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm0, %rax +; SSE-NEXT: cvttsd2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rax -; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2] ; SSE-NEXT: movaps %xmm1, %xmm0 @@ -1240,17 +1230,16 @@ ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: subss %xmm2, %xmm1 ; SSE-NEXT: cvttss2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: subss %xmm2, %xmm3 ; SSE-NEXT: cvttss2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: btcq $63, %rax ; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rcx @@ -1264,16 +1253,15 @@ ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 ; VEX-NEXT: vcvttss2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm0, %rdx +; VEX-NEXT: btcq $63, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx ; VEX-NEXT: vucomiss %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 ; VEX-NEXT: vcvttss2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: btcq $63, %rax ; VEX-NEXT: vcvttss2si %xmm0, %rcx ; VEX-NEXT: vucomiss %xmm1, %xmm0 ; VEX-NEXT: cmovaeq %rax, %rcx @@ -1385,17 +1373,16 @@ ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: subss %xmm2, %xmm1 ; SSE-NEXT: cvttss2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: subss %xmm2, %xmm3 ; SSE-NEXT: cvttss2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: btcq $63, %rax ; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rcx @@ -1409,16 +1396,15 @@ ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 ; VEX-NEXT: vcvttss2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm0, %rdx +; VEX-NEXT: btcq $63, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx ; VEX-NEXT: vucomiss %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 ; VEX-NEXT: vcvttss2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: btcq $63, %rax ; VEX-NEXT: vcvttss2si %xmm0, %rcx ; VEX-NEXT: vucomiss %xmm1, %xmm0 ; VEX-NEXT: cmovaeq %rax, %rcx @@ -1472,17 +1458,16 @@ ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: subss %xmm2, %xmm1 ; SSE-NEXT: cvttss2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: subss %xmm2, %xmm3 ; SSE-NEXT: cvttss2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: btcq $63, %rax ; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm2, %xmm0 ; SSE-NEXT: cmovaeq %rax, %rcx @@ -1497,19 +1482,18 @@ ; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3 ; VEX-NEXT: vcvttss2si %xmm3, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm1, %rdx +; VEX-NEXT: btcq $63, %rax +; VEX-NEXT: vcvttss2si %xmm1, %rcx ; VEX-NEXT: vucomiss %xmm2, %xmm1 -; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: cmovaeq %rax, %rcx ; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1 ; VEX-NEXT: vcvttss2si %xmm1, %rax -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: btcq $63, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rdx ; VEX-NEXT: vucomiss %xmm2, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rcx -; VEX-NEXT: vmovq %rcx, %xmm0 -; VEX-NEXT: vmovq %rdx, %xmm1 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm0 +; VEX-NEXT: vmovq %rcx, %xmm1 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; @@ -1679,43 +1663,42 @@ ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm1, %xmm2 -; SSE-NEXT: cvttss2si %xmm2, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: cvttss2si %xmm2, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm3, %rdx +; SSE-NEXT: cvttss2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm3, %rcx ; SSE-NEXT: ucomiss %xmm1, %xmm3 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm3 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm3, %rdx +; SSE-NEXT: cvttss2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm3, %rcx ; SSE-NEXT: ucomiss %xmm1, %xmm3 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm3 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: cvttss2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rax -; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq @@ -1726,32 +1709,31 @@ ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax -; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm2, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttss2si %xmm2, %rcx ; AVX1-NEXT: vucomiss %xmm1, %xmm2 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4 ; AVX1-NEXT: vcvttss2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm3, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttss2si %xmm3, %rcx ; AVX1-NEXT: vucomiss %xmm1, %xmm3 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm0, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttss2si %xmm0, %rcx ; AVX1-NEXT: vucomiss %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4 ; AVX1-NEXT: vcvttss2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax +; AVX1-NEXT: btcq $63, %rax ; AVX1-NEXT: vcvttss2si %xmm0, %rcx ; AVX1-NEXT: vucomiss %xmm1, %xmm0 ; AVX1-NEXT: cmovaeq %rax, %rcx @@ -1766,32 +1748,31 @@ ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm2, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttss2si %xmm2, %rcx ; AVX2-NEXT: vucomiss %xmm1, %xmm2 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm3, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttss2si %xmm3, %rcx ; AVX2-NEXT: vucomiss %xmm1, %xmm3 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm0, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttss2si %xmm0, %rcx ; AVX2-NEXT: vucomiss %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax +; AVX2-NEXT: btcq $63, %rax ; AVX2-NEXT: vcvttss2si %xmm0, %rcx ; AVX2-NEXT: vucomiss %xmm1, %xmm0 ; AVX2-NEXT: cmovaeq %rax, %rcx @@ -1857,43 +1838,42 @@ ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm1, %xmm2 -; SSE-NEXT: cvttss2si %xmm2, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: cvttss2si %xmm2, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm3, %rdx +; SSE-NEXT: cvttss2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm3, %rcx ; SSE-NEXT: ucomiss %xmm1, %xmm3 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm3 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm3, %rdx +; SSE-NEXT: cvttss2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm3, %rcx ; SSE-NEXT: ucomiss %xmm1, %xmm3 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm3 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: cvttss2si %xmm4, %rax +; SSE-NEXT: btcq $63, %rax +; SSE-NEXT: cvttss2si %xmm0, %rcx ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rax -; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq @@ -1904,32 +1884,31 @@ ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax -; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm2, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttss2si %xmm2, %rcx ; AVX1-NEXT: vucomiss %xmm1, %xmm2 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] ; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4 ; AVX1-NEXT: vcvttss2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm3, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttss2si %xmm3, %rcx ; AVX1-NEXT: vucomiss %xmm1, %xmm3 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm0, %rdx +; AVX1-NEXT: btcq $63, %rax +; AVX1-NEXT: vcvttss2si %xmm0, %rcx ; AVX1-NEXT: vucomiss %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4 ; AVX1-NEXT: vcvttss2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax +; AVX1-NEXT: btcq $63, %rax ; AVX1-NEXT: vcvttss2si %xmm0, %rcx ; AVX1-NEXT: vucomiss %xmm1, %xmm0 ; AVX1-NEXT: cmovaeq %rax, %rcx @@ -1944,32 +1923,31 @@ ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm2, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttss2si %xmm2, %rcx ; AVX2-NEXT: vucomiss %xmm1, %xmm2 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] ; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm3, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttss2si %xmm3, %rcx ; AVX2-NEXT: vucomiss %xmm1, %xmm3 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm0, %rdx +; AVX2-NEXT: btcq $63, %rax +; AVX2-NEXT: vcvttss2si %xmm0, %rcx ; AVX2-NEXT: vucomiss %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax +; AVX2-NEXT: btcq $63, %rax ; AVX2-NEXT: vcvttss2si %xmm0, %rcx ; AVX2-NEXT: vucomiss %xmm1, %xmm0 ; AVX2-NEXT: cmovaeq %rax, %rcx Index: test/CodeGen/X86/x86-64-bittest-logic.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/x86-64-bittest-logic.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s + +define i64 @and1(i64 %x) { +; CHECK-LABEL: and1: +; CHECK: # BB#0: +; CHECK-NEXT: btrq $31, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = and i64 %x, 18446744071562067967 ; clear bit 31 + ret i64 %a +} + +define i64 @and2(i64 %x) { +; CHECK-LABEL: and2: +; CHECK: # BB#0: +; CHECK-NEXT: btrq $32, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = and i64 %x, 18446744069414584319 ; clear bit 32 + ret i64 %a +} + +define i64 @and3(i64 %x) { +; CHECK-LABEL: and3: +; CHECK: # BB#0: +; CHECK-NEXT: btrq $62, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = and i64 %x, 13835058055282163711 ; clear bit 62 + ret i64 %a +} + +define i64 @and4(i64 %x) { +; CHECK-LABEL: and4: +; CHECK: # BB#0: +; CHECK-NEXT: btrq $63, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = and i64 %x, 9223372036854775807 ; clear bit 63 + ret i64 %a +} + +define i64 @or1(i64 %x) { +; CHECK-LABEL: or1: +; CHECK: # BB#0: +; CHECK-NEXT: btsq $31, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = or i64 %x, 2147483648 ; set bit 31 + ret i64 %a +} + +define i64 @or2(i64 %x) { +; CHECK-LABEL: or2: +; CHECK: # BB#0: +; CHECK-NEXT: btsq $32, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = or i64 %x, 4294967296 ; set bit 32 + ret i64 %a +} + +define i64 @or3(i64 %x) { +; CHECK-LABEL: or3: +; CHECK: # BB#0: +; CHECK-NEXT: btsq $62, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = or i64 %x, 4611686018427387904 ; set bit 62 + ret i64 %a +} + +define i64 @or4(i64 %x) { +; CHECK-LABEL: or4: +; CHECK: # BB#0: +; CHECK-NEXT: btsq $63, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = or i64 %x, 9223372036854775808 ; set bit 63 + ret i64 %a +} + +define i64 @xor1(i64 %x) { +; CHECK-LABEL: xor1: +; CHECK: # BB#0: +; CHECK-NEXT: btcq $31, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = xor i64 %x, 2147483648 ; toggle bit 31 + ret i64 %a +} + +define i64 @xor2(i64 %x) { +; CHECK-LABEL: xor2: +; CHECK: # BB#0: +; CHECK-NEXT: btcq $32, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = xor i64 %x, 4294967296 ; toggle bit 32 + ret i64 %a +} + +define i64 @xor3(i64 %x) { +; CHECK-LABEL: xor3: +; CHECK: # BB#0: +; CHECK-NEXT: btcq $62, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = xor i64 %x, 4611686018427387904 ; toggle bit 62 + ret i64 %a +} + +define i64 @xor4(i64 %x) { +; CHECK-LABEL: xor4: +; CHECK: # BB#0: +; CHECK-NEXT: btcq $63, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %a = xor i64 %x, 9223372036854775808 ; toggle bit 63 + ret i64 %a +}