Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -16018,6 +16018,12 @@ } } + // Sometimes flags can be set either with an AND or with an SRL/SHL + // instruction. SRL/SHL variant should be preferred for masks longer than this + // number of bits. + const int ShiftToAndMaxMaskWidth = 32; + const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE); + // NOTICE: In the code below we use ArithOp to hold the arithmetic operation // which may be the result of a CAST. We use the variable 'Op', which is the // non-casted variable when we check for possible users. @@ -16066,7 +16072,7 @@ // If we have a constant logical shift that's only used in a comparison // against zero turn it into an equivalent AND. This allows turning it into // a TEST instruction later. - if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() && + if (ZeroCheck && Op->hasOneUse() && isa(Op->getOperand(1)) && !hasNonFlagsUse(Op)) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); @@ -16076,7 +16082,7 @@ APInt Mask = ArithOp.getOpcode() == ISD::SRL ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt) : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt); - if (!Mask.isSignedIntN(32)) // Avoid large immediates. + if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth)) break; Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0), DAG.getConstant(Mask, dl, VT)); @@ -16085,18 +16091,59 @@ case ISD::AND: // If the primary 'and' result isn't used, don't bother using X86ISD::AND, - // because a TEST instruction will be better. + // because a TEST instruction will be better. However, AND should be + // preferred if the instruction can be combined into ANDN. if (!hasNonFlagsUse(Op)) { SDValue Op0 = ArithOp->getOperand(0); SDValue Op1 = ArithOp->getOperand(1); EVT VT = ArithOp.getValueType(); bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1); bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64; + bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI(); + + // If we cannot select an ANDN instruction, check if we can replace + // AND+IMM64 with a shift before giving up. This is possible for masks + // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag. + if (!isProperAndn) { + if (!ZeroCheck) + break; + + assert(!isa(Op0) && "AND node isn't canonicalized"); + auto *CN = dyn_cast(Op1); + if (!CN) + break; + + const APInt &Mask = CN->getAPIntValue(); + if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth)) + break; // Prefer TEST instruction. + + unsigned BitWidth = Mask.getBitWidth(); + unsigned LeadingOnes = Mask.countLeadingOnes(); + unsigned TrailingZeros = Mask.countTrailingZeros(); + + if (LeadingOnes + TrailingZeros == BitWidth) { + assert(TrailingZeros < VT.getSizeInBits() && + "Shift amount should be less than the type width"); + MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT); + SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy); + Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt); + break; + } + + unsigned LeadingZeros = Mask.countLeadingZeros(); + unsigned TrailingOnes = Mask.countTrailingOnes(); + + if (LeadingZeros + TrailingOnes == BitWidth) { + assert(LeadingZeros < VT.getSizeInBits() && + "Shift amount should be less than the type width"); + MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT); + SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy); + Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt); + break; + } - // But if we can combine this into an ANDN operation, then create an AND - // now and allow it to be pattern matched into an ANDN. - if (!Subtarget.hasBMI() || !isAndn || !isLegalAndnType) break; + } } LLVM_FALLTHROUGH; case ISD::SUB: @@ -16116,7 +16163,7 @@ case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; case ISD::OR: { - if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { + if (!NeedTruncation && ZeroCheck) { if (SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG)) return EFLAGS; } Index: llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll +++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-64.ll @@ -9,8 +9,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; CHECK-NEXT: testq %rcx, %rax +; CHECK-NEXT: shrq $32, %rax ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # BB#2: ; CHECK-NEXT: movq %rdi, %rax @@ -32,8 +31,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; CHECK-NEXT: testq %rcx, %rax +; CHECK-NEXT: shrq $32, %rax ; CHECK-NEXT: je .LBB1_1 ; CHECK-NEXT: # BB#2: ; CHECK-NEXT: movq %rdi, %rax @@ -57,8 +55,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; CHECK-NEXT: testq %rcx, %rax +; CHECK-NEXT: shrq $32, %rax ; CHECK-NEXT: je .LBB2_1 ; CHECK-NEXT: # BB#2: ; CHECK-NEXT: movq %rdi, %rax Index: llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll +++ llvm/trunk/test/CodeGen/X86/bypass-slow-division-tune.ll @@ -22,9 +22,8 @@ define i64 @div64(i64 %a, i64 %b) { entry: ; CHECK-LABEL: div64: -; CHECK-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]] -; CHECK-DAG: orq %{{.*}}, [[REG:%[a-z]+]] -; CHECK: testq [[REGMSK]], [[REG]] +; CHECK: orq %{{.*}}, [[REG:%[a-z]+]] +; CHECK: shrq $32, [[REG]] ; CHECK: divl ; %div = sdiv i64 %a, %b Index: llvm/trunk/test/CodeGen/X86/cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cmp.ll +++ llvm/trunk/test/CodeGen/X86/cmp.ll @@ -281,4 +281,54 @@ ; CHECK: setne ; CHECK: testl ; CHECK: setne -} \ No newline at end of file +} + +define i32 @test21(i64 %val) { + %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000 + %cmp = icmp ne i64 %and, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret + +; CHECK-LABEL: test21 +; CHECK: shrq $41, %rdi +; CHECK-NOT: test +; CHECK: setne %al +; CHECK: retq +} + +; AND-to-SHR transformation is enabled for eq/ne condition codes only. +define i32 @test22(i64 %val) { + %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000 + %cmp = icmp ult i64 %and, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret + +; CHECK-LABEL: test22 +; CHECK-NOT: shrq $41 +; CHECK: retq +} + +define i32 @test23(i64 %val) { + %and = and i64 %val, -1048576 ; 0xFFFFFFFFFFF00000 + %cmp = icmp ne i64 %and, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret + +; CHECK-LABEL: test23 +; CHECK: testq $-1048576, %rdi +; CHECK: setne %al +; CHECK: retq +} + +define i32 @test24(i64 %val) { + %and = and i64 %val, 281474976710655 ; 0x0000FFFFFFFFFFFF + %cmp = icmp ne i64 %and, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret + +; CHECK-LABEL: test24 +; CHECK: shlq $16, %rdi +; CHECK-NOT: test +; CHECK: setne %al +; CHECK: retq +}