Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -15790,6 +15790,12 @@ } } + // Sometimes flags can be set either with an AND or with an SRL/SHL + // instruction. SRL/SHL variant should be preferred for masks longer than this + // number of bits. + const int SHIFT_TO_AND_MAX_MASK_WIDTH = 32; + const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE); + // NOTICE: In the code below we use ArithOp to hold the arithmetic operation // which may be the result of a CAST. We use the variable 'Op', which is the // non-casted variable when we check for possible users. @@ -15838,7 +15844,7 @@ // If we have a constant logical shift that's only used in a comparison // against zero turn it into an equivalent AND. This allows turning it into // a TEST instruction later. - if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() && + if (ZeroCheck && Op->hasOneUse() && isa(Op->getOperand(1)) && !hasNonFlagsUse(Op)) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); @@ -15848,7 +15854,7 @@ APInt Mask = ArithOp.getOpcode() == ISD::SRL ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt) : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt); - if (!Mask.isSignedIntN(32)) // Avoid large immediates. + if (!Mask.isSignedIntN(SHIFT_TO_AND_MAX_MASK_WIDTH)) break; Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0), DAG.getConstant(Mask, dl, VT)); @@ -15857,18 +15863,52 @@ case ISD::AND: // If the primary 'and' result isn't used, don't bother using X86ISD::AND, - // because a TEST instruction will be better. + // because a TEST instruction will be better. However, AND should be + // preferred if the instruction can be combined into ANDN. if (!hasNonFlagsUse(Op)) { SDValue Op0 = ArithOp->getOperand(0); SDValue Op1 = ArithOp->getOperand(1); EVT VT = ArithOp.getValueType(); bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1); bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64; + bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI(); + + // If we cannot select an ANDN instruction, check if we can replace + // AND+IMM64 with a shift before giving up. This is possible for masks + // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag. + if (!isProperAndn) { + if (!ZeroCheck) + break; + + assert(!dyn_cast(Op0) && "AND node isn't canonicalized"); + ConstantSDNode *CN = dyn_cast(Op1); + if (!CN) + break; + + const APInt &Mask = CN->getAPIntValue(); + if (Mask.isSignedIntN(SHIFT_TO_AND_MAX_MASK_WIDTH)) + break; // Prefer TEST instruction. + + if (Mask.countLeadingOnes() + Mask.countTrailingZeros() == + Mask.getBitWidth()) { + assert(Mask.countTrailingZeros() < VT.getSizeInBits() && + "Shift amount should be less than the type width"); + SDValue ShAmt = DAG.getConstant(Mask.countTrailingZeros(), dl, VT); + Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt); + break; + } + + if (Mask.countLeadingZeros() + Mask.countTrailingOnes() == + Mask.getBitWidth()) { + assert(Mask.countLeadingZeros() < VT.getSizeInBits() && + "Shift amount should be less than the type width"); + SDValue ShAmt = DAG.getConstant(Mask.countLeadingZeros(), dl, VT); + Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt); + break; + } - // But if we can combine this into an ANDN operation, then create an AND - // now and allow it to be pattern matched into an ANDN. - if (!Subtarget.hasBMI() || !isAndn || !isLegalAndnType) break; + } } LLVM_FALLTHROUGH; case ISD::SUB: @@ -15888,7 +15928,7 @@ case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; case ISD::OR: { - if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { + if (!NeedTruncation && ZeroCheck) { if (SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG)) return EFLAGS; } Index: test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-64.ll +++ test/CodeGen/X86/bypass-slow-division-64.ll @@ -8,9 +8,8 @@ define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: Test_get_quotient: ; CHECK: movq %rdi, %rax -; CHECK-DAG: movabsq $-4294967296, %rcx -; CHECK-DAG: orq %rsi, %rax -; CHECK: testq %rcx, %rax +; CHECK: orq %rsi, %rax +; CHECK: shrq $32, %rax ; CHECK: je ; CHECK: idivq ; CHECK: ret @@ -23,9 +22,8 @@ define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: Test_get_remainder: ; CHECK: movq %rdi, %rax -; CHECK-DAG: movabsq $-4294967296, %rcx -; CHECK-DAG: orq %rsi, %rax -; CHECK: testq %rcx, %rax +; CHECK: orq %rsi, %rax +; CHECK: shrq $32, %rax ; CHECK: je ; CHECK: idivq ; CHECK: ret @@ -38,9 +36,8 @@ define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: Test_get_quotient_and_remainder: ; CHECK: movq %rdi, %rax -; CHECK-DAG: movabsq $-4294967296, %rcx -; CHECK-DAG: orq %rsi, %rax -; CHECK: testq %rcx, %rax +; CHECK: orq %rsi, %rax +; CHECK: shrq $32, %rax ; CHECK: je ; CHECK: idivq ; CHECK-NEXT: addq Index: test/CodeGen/X86/bypass-slow-division-tune.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-tune.ll +++ test/CodeGen/X86/bypass-slow-division-tune.ll @@ -22,9 +22,8 @@ define i64 @div64(i64 %a, i64 %b) { entry: ; CHECK-LABEL: div64: -; CHECK-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]] -; CHECK-DAG: orq %{{.*}}, [[REG:%[a-z]+]] -; CHECK: testq [[REGMSK]], [[REG]] +; CHECK: orq %{{.*}}, [[REG:%[a-z]+]] +; CHECK: shrq $32, [[REG]] ; CHECK: divl ; %div = sdiv i64 %a, %b Index: test/CodeGen/X86/cmp.ll =================================================================== --- test/CodeGen/X86/cmp.ll +++ test/CodeGen/X86/cmp.ll @@ -281,4 +281,54 @@ ; CHECK: setne ; CHECK: testl ; CHECK: setne +} + +define i32 @test21(i64 %val) { + %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000 + %cmp = icmp ne i64 %and, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret + +; CHECK-LABEL: test21 +; CHECK: shrq $41, %rdi +; CHECK-NOT: test +; CHECK: setne %al +; CHECK: retq +} + +; AND-to-SHR transformation is enabled for eq/ne condition codes only. +define i32 @test22(i64 %val) { + %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000 + %cmp = icmp ult i64 %and, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret + +; CHECK-LABEL: test22 +; CHECK-NOT: shrq $41 +; CHECK: retq +} + +define i32 @test23(i64 %val) { + %and = and i64 %val, -1048576 ; 0xFFFFFFFFFFF00000 + %cmp = icmp ne i64 %and, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret + +; CHECK-LABEL: test23 +; CHECK: testq $-1048576, %rdi +; CHECK: setne %al +; CHECK: retq +} + +define i32 @test24(i64 %val) { + %and = and i64 %val, 281474976710655 ; 0x0000FFFFFFFFFFFF + %cmp = icmp ne i64 %and, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret + +; CHECK-LABEL: test24 +; CHECK: shlq $16, %rdi +; CHECK-NOT: test +; CHECK: setne %al +; CHECK: retq } \ No newline at end of file