Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1468,6 +1468,10 @@ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); + if (MinBits == 1 && C1 == 1) + // Invert the condition. + return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -15010,9 +15010,32 @@ return 2; } +/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition +/// according to equal/not-equal condition code \p CC. +static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC, + const SDLoc &dl, SelectionDAG &DAG) { + // If Src is i8, promote it to i32 with any_extend. There is no i8 BT + // instruction. Since the shift amount is in-range-or-undefined, we know + // that doing a bittest on the i32 value is ok. We extend to i32 because + // the encoding for the i16 version is larger than the i32 version. + // Also promote i16 to i32 for performance / code size reason. + if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16) + Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src); + + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. + if (Src.getValueType() != BitNo.getValueType()) + BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); + + SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); + X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(Cond, dl, MVT::i8), BT); +} + /// Result of 'and' is compared against zero. Change to a BT node if possible. -SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, - const SDLoc &dl, SelectionDAG &DAG) const { +static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, + const SDLoc &dl, SelectionDAG &DAG) { SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); if (Op0.getOpcode() == ISD::TRUNCATE) @@ -15055,30 +15078,38 @@ } } - if (LHS.getNode()) { - // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT - // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i32 value is ok. We extend to i32 because - // the encoding for the i16 version is larger than the i32 version. - // Also promote i16 to i32 for performance / code size reason. - if (LHS.getValueType() == MVT::i8 || - LHS.getValueType() == MVT::i16) - LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); - - // If the operand types disagree, extend the shift amount to match. Since - // BT ignores high bits (like shifts) we can use anyextend. - if (LHS.getValueType() != RHS.getValueType()) - RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); - - SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); - X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(Cond, dl, MVT::i8), BT); - } + if (LHS.getNode()) + return getBitTestCondition(LHS, RHS, CC, dl, DAG); return SDValue(); } +// Convert (truncate (srl X, N) to i1) to (bt X, N) +static SDValue LowerTruncateToBT(SDValue Op, ISD::CondCode CC, + const SDLoc &dl, SelectionDAG &DAG) { + + assert(Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1 && + "Expected TRUNCATE to i1 node"); + + if (Op.getOperand(0).getOpcode() != ISD::SRL) + return SDValue(); + + SDValue ShiftRight = Op.getOperand(0); + return getBitTestCondition(ShiftRight.getOperand(0), ShiftRight.getOperand(1), + CC, dl, DAG); +} + +/// Result of 'and' or 'trunc to i1' is compared against zero. +/// Change to a BT node if possible. +SDValue X86TargetLowering::LowerToBT(SDValue Op, ISD::CondCode CC, + const SDLoc &dl, SelectionDAG &DAG) const { + if (Op.getOpcode() == ISD::AND) + return LowerAndToBT(Op, CC, dl, DAG); + if (Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1) + return LowerTruncateToBT(Op, CC, dl, DAG); + return SDValue(); +} + /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask /// CMPs. static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, @@ -15606,8 +15637,8 @@ // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). - if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && - isNullConstant(Op1) && + // Lower (trunc (X >> N) to i1) to BT(X, N). + if (Op0.hasOneUse() && isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) { if (VT == MVT::i1) { @@ -16798,9 +16829,8 @@ // Look pass the truncate if the high bits are known zero. Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG); - // We know the result of AND is compared against zero. Try to match - // it to BT. - if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + // We know the result is compared against zero. Try to match it to BT. + if (Cond.hasOneUse()) { if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); Index: llvm/trunk/test/CodeGen/X86/bt.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bt.ll +++ llvm/trunk/test/CodeGen/X86/bt.ll @@ -1,7 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=PENTIUM4 %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX-512 %s - +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s ; PR3253 ; The register+memory form of the BT instruction should be usable on @@ -21,29 +20,11 @@ ; - The and can be commuted. define void @test2(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: test2: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jb .LBB0_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB0_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: test2: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: jne .LBB0_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB0_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: test2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jb .LBB0_2 +; entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -59,29 +40,11 @@ } define void @test2b(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: test2b: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jb .LBB1_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB1_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: test2b: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: jne .LBB1_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB1_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: test2b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jb .LBB1_2 +; entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -97,29 +60,11 @@ } define void @atest2(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: atest2: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jb .LBB2_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB2_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: atest2: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: jne .LBB2_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB2_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: atest2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jb .LBB2_2 +; entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -135,29 +80,11 @@ } define void @atest2b(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: atest2b: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jb .LBB3_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB3_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: atest2b: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: jne .LBB3_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB3_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: atest2b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jb .LBB3_2 +; entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -177,12 +104,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB4_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB4_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -202,12 +124,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB5_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB5_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -223,29 +140,11 @@ } define void @testne2(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: testne2: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jae .LBB6_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB6_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: testne2: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: je .LBB6_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB6_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: testne2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jae .LBB6_2 +; entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -261,29 +160,11 @@ } define void @testne2b(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: testne2b: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jae .LBB7_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB7_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: testne2b: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: je .LBB7_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB7_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: testne2b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jae .LBB7_2 +; entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -299,29 +180,11 @@ } define void @atestne2(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: atestne2: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jae .LBB8_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB8_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: atestne2: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: je .LBB8_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB8_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: atestne2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jae .LBB8_2 +; entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -337,29 +200,11 @@ } define void @atestne2b(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: atestne2b: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jae .LBB9_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB9_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: atestne2b: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: je .LBB9_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB9_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: atestne2b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jae .LBB9_2 +; entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -379,12 +224,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB10_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB10_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -404,12 +244,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB11_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB11_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -425,29 +260,11 @@ } define void @query2(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: query2: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jae .LBB12_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB12_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: query2: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: je .LBB12_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB12_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: query2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jae .LBB12_2 +; entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -463,29 +280,11 @@ } define void @query2b(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: query2b: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jae .LBB13_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB13_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: query2b: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: je .LBB13_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB13_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: query2b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jae .LBB13_2 +; entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -501,29 +300,11 @@ } define void @aquery2(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: aquery2: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jae .LBB14_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB14_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: aquery2: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: je .LBB14_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB14_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: aquery2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jae .LBB14_2 +; entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -539,29 +320,11 @@ } define void @aquery2b(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: aquery2b: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jae .LBB15_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB15_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: aquery2b: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: testb $1, %dil -; AVX-512-NEXT: je .LBB15_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB15_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: aquery2b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jae .LBB15_2 +; entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -581,12 +344,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB16_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB16_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -606,12 +364,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB17_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB17_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -631,12 +384,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB18_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB18_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -656,12 +404,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jae .LBB19_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB19_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -677,35 +420,11 @@ } define void @queryne2(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: queryne2: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jb .LBB20_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB20_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: queryne2: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: andl $1, %edi -; AVX-512-NEXT: kmovw %edi, %k0 -; AVX-512-NEXT: kxnorw %k0, %k0, %k1 -; AVX-512-NEXT: kshiftrw $15, %k1, %k1 -; AVX-512-NEXT: kxorw %k1, %k0, %k0 -; AVX-512-NEXT: kmovw %k0, %eax -; AVX-512-NEXT: testb %al, %al -; AVX-512-NEXT: je .LBB20_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB20_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: queryne2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jb .LBB20_2 +; entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -721,35 +440,11 @@ } define void @queryne2b(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: queryne2b: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jb .LBB21_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB21_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: queryne2b: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: andl $1, %edi -; AVX-512-NEXT: kmovw %edi, %k0 -; AVX-512-NEXT: kxnorw %k0, %k0, %k1 -; AVX-512-NEXT: kshiftrw $15, %k1, %k1 -; AVX-512-NEXT: kxorw %k1, %k0, %k0 -; AVX-512-NEXT: kmovw %k0, %eax -; AVX-512-NEXT: testb %al, %al -; AVX-512-NEXT: je .LBB21_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB21_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: queryne2b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jb .LBB21_2 +; entry: %tmp29 = lshr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -765,35 +460,11 @@ } define void @aqueryne2(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: aqueryne2: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jb .LBB22_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB22_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: aqueryne2: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: andl $1, %edi -; AVX-512-NEXT: kmovw %edi, %k0 -; AVX-512-NEXT: kxnorw %k0, %k0, %k1 -; AVX-512-NEXT: kshiftrw $15, %k1, %k1 -; AVX-512-NEXT: kxorw %k1, %k0, %k0 -; AVX-512-NEXT: kmovw %k0, %eax -; AVX-512-NEXT: testb %al, %al -; AVX-512-NEXT: je .LBB22_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB22_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: aqueryne2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jb .LBB22_2 +; entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 %tmp29, 1 @@ -809,35 +480,11 @@ } define void @aqueryne2b(i32 %x, i32 %n) nounwind { -; PENTIUM4-LABEL: aqueryne2b: -; PENTIUM4: # BB#0: # %entry -; PENTIUM4-NEXT: btl %esi, %edi -; PENTIUM4-NEXT: jb .LBB23_2 -; PENTIUM4-NEXT: # BB#1: # %bb -; PENTIUM4-NEXT: pushq %rax -; PENTIUM4-NEXT: callq foo -; PENTIUM4-NEXT: popq %rax -; PENTIUM4-NEXT: .LBB23_2: # %UnifiedReturnBlock -; PENTIUM4-NEXT: retq -; -; AVX-512-LABEL: aqueryne2b: -; AVX-512: # BB#0: # %entry -; AVX-512-NEXT: movl %esi, %ecx -; AVX-512-NEXT: shrl %cl, %edi -; AVX-512-NEXT: andl $1, %edi -; AVX-512-NEXT: kmovw %edi, %k0 -; AVX-512-NEXT: kxnorw %k0, %k0, %k1 -; AVX-512-NEXT: kshiftrw $15, %k1, %k1 -; AVX-512-NEXT: kxorw %k1, %k0, %k0 -; AVX-512-NEXT: kmovw %k0, %eax -; AVX-512-NEXT: testb %al, %al -; AVX-512-NEXT: je .LBB23_2 -; AVX-512-NEXT: # BB#1: # %bb -; AVX-512-NEXT: pushq %rax -; AVX-512-NEXT: callq foo -; AVX-512-NEXT: popq %rax -; AVX-512-NEXT: .LBB23_2: # %UnifiedReturnBlock -; AVX-512-NEXT: retq +; CHECK-LABEL: aqueryne2b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: jb .LBB23_2 +; entry: %tmp29 = ashr i32 %x, %n %tmp3 = and i32 1, %tmp29 @@ -857,12 +504,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB24_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB24_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -882,12 +524,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB25_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB25_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -907,12 +544,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB26_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB26_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %tmp29, %x @@ -932,12 +564,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: jb .LBB27_2 -; CHECK-NEXT: # BB#1: # %bb -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: callq foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .LBB27_2: # %UnifiedReturnBlock -; CHECK-NEXT: retq +; entry: %tmp29 = shl i32 1, %n %tmp3 = and i32 %x, %tmp29 @@ -961,6 +588,7 @@ ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: setb %al ; CHECK-NEXT: retq +; %neg = xor i32 %flags, -1 %shl = shl i32 1, %flag %and = and i32 %shl, %neg