Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5375,6 +5375,10 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { assert(N->getOpcode() == ISD::XOR); + // Don't touch 'not' (i.e. where y = -1). + if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1))) + return SDValue(); + EVT VT = N->getValueType(0); // FIXME @@ -5383,29 +5387,35 @@ // There are 3 commutable operators in the pattern, // so we have to deal with 8 possible variants of the basic pattern. - SDValue X, Y, M; - auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { + SDValue A, D, X, Y, M; + auto matchAndXor = [&A, &D, &X, &Y, &M](SDValue And, unsigned XorIdx, + SDValue Other) { if (And.getOpcode() != ISD::AND || !And.hasOneUse()) return false; - if (And.getOperand(XorIdx).getOpcode() != ISD::XOR || - !And.getOperand(XorIdx).hasOneUse()) + SDValue Xor = And.getOperand(XorIdx); + if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) + return false; + SDValue Xor0 = Xor.getOperand(0); + SDValue Xor1 = Xor.getOperand(1); + // Don't touch 'not' (i.e. where y = -1). + if (isAllOnesConstantOrAllOnesSplatConstant(Xor1)) return false; - SDValue Xor0 = And.getOperand(XorIdx).getOperand(0); - SDValue Xor1 = And.getOperand(XorIdx).getOperand(1); if (Other == Xor0) std::swap(Xor0, Xor1); if (Other != Xor1) return false; + A = And; + D = Xor; X = Xor0; Y = Xor1; M = And.getOperand(XorIdx ? 0 : 1); return true; }; - SDValue A = N->getOperand(0); - SDValue B = N->getOperand(1); - if (!matchAndXor(A, 0, B) && !matchAndXor(A, 1, B) && !matchAndXor(B, 0, A) && - !matchAndXor(B, 1, A)) + SDValue A_ = N->getOperand(0); + SDValue B_ = N->getOperand(1); + if (!matchAndXor(A_, 0, B_) && !matchAndXor(A_, 1, B_) && + !matchAndXor(B_, 0, A_) && !matchAndXor(B_, 1, A_)) return SDValue(); // Don't do anything if the mask is constant. This should not be reachable. @@ -5419,9 +5429,17 @@ return SDValue(); SDLoc DL(N); + SDValue NotM = DAG.getNOT(DL, M, VT); + + // If Y is a constant, check that 'andn' works with immediates. + if (!TLI.hasAndNot(Y)) { + assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); + // If not, de-canonicalze (Invert) the mask, swap the value in B part. + SDValue NewA = DAG.getNode(ISD::AND, DL, VT, D, NotM); + return DAG.getNode(ISD::OR, DL, VT, NewA, X); + } SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); - SDValue NotM = DAG.getNOT(DL, M, VT); SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -834,6 +834,8 @@ bool hasAndNotCompare(SDValue Y) const override; + bool hasAndNot(SDValue Y) const override; + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4743,6 +4743,9 @@ } bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { + // A mask and compare against constant is ok for an 'andn' too + // even though the BMI instruction doesn't have an immediate form. + if (!Subtarget.hasBMI()) return false; @@ -4754,6 +4757,14 @@ return true; } +bool X86TargetLowering::hasAndNot(SDValue Y) const { + // x86 can't form 'andn' with an immediate. + if (isa(Y)) + return false; + + return hasAndNotCompare(Y); +} + MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { MVT VT = MVT::getIntegerVT(NumBits); if (isTypeLegal(VT)) Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -347,8 +347,8 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_varx_mone: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, w2 -; CHECK-NEXT: orn w0, w8, w2 +; CHECK-NEXT: bic w8, w2, w0 +; CHECK-NEXT: mvn w0, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, -1 ; %x %n1 = and i32 %n0, %mask @@ -370,8 +370,9 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_varx_mone_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w0, w2 -; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: bic w8, w8, w2 +; CHECK-NEXT: mvn w0, w8 ; CHECK-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, -1 ; %x @@ -449,8 +450,8 @@ define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w1, w2 -; CHECK-NEXT: orr w0, w2, w8 +; CHECK-NEXT: bic w8, w2, w1 +; CHECK-NEXT: eor w0, w8, w1 ; CHECK-NEXT: ret %n0 = xor i32 -1, %y ; %x %n1 = and i32 %n0, %mask @@ -472,8 +473,9 @@ define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_mone_vary_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: orn w0, w8, w2 +; CHECK-NEXT: mvn w8, w1 +; CHECK-NEXT: bic w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w1 ; CHECK-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 -1, %y ; %x Index: test/CodeGen/X86/icmp-opt.ll =================================================================== --- test/CodeGen/X86/icmp-opt.ll +++ test/CodeGen/X86/icmp-opt.ll @@ -17,9 +17,9 @@ ; ; CHECK-BMI-LABEL: t1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: shrq $63, %rdi -; CHECK-BMI-NEXT: xorl $1, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %eax, %eax +; CHECK-BMI-NEXT: testq %rdi, %rdi +; CHECK-BMI-NEXT: setns %al ; CHECK-BMI-NEXT: retq %cmp = icmp sgt i64 %a, -1 %conv = zext i1 %cmp to i32 Index: test/CodeGen/X86/selectcc-to-shiftand.ll =================================================================== --- test/CodeGen/X86/selectcc-to-shiftand.ll +++ test/CodeGen/X86/selectcc-to-shiftand.ll @@ -101,10 +101,10 @@ ; ; CHECK-BMI-LABEL: pos_sel_constants: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: sarl $31, %edi -; CHECK-BMI-NEXT: notl %edi -; CHECK-BMI-NEXT: andl $5, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %eax, %eax +; CHECK-BMI-NEXT: testl %edi, %edi +; CHECK-BMI-NEXT: setns %al +; CHECK-BMI-NEXT: leal (%rax,%rax,4), %eax ; CHECK-BMI-NEXT: retq %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 5, i32 0 @@ -124,10 +124,10 @@ ; ; CHECK-BMI-LABEL: pos_sel_special_constant: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: shrl $22, %edi -; CHECK-BMI-NEXT: notl %edi -; CHECK-BMI-NEXT: andl $512, %edi # imm = 0x200 -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl %eax, %eax +; CHECK-BMI-NEXT: testl %edi, %edi +; CHECK-BMI-NEXT: setns %al +; CHECK-BMI-NEXT: shll $9, %eax ; CHECK-BMI-NEXT: retq %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 512, i32 0 Index: test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll @@ -570,10 +570,8 @@ ; ; CHECK-BMI-LABEL: in_constant_varx_mone: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: notl %edx -; CHECK-BMI-NEXT: orl %edi, %edx -; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: andnl %edx, %edi, %eax +; CHECK-BMI-NEXT: notl %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, -1 ; %x %n1 = and i32 %n0, %mask @@ -612,8 +610,9 @@ ; ; CHECK-BMI-LABEL: in_constant_varx_mone_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andnl %edi, %edx, %eax -; CHECK-BMI-NEXT: orl %edx, %eax +; CHECK-BMI-NEXT: notl %edx +; CHECK-BMI-NEXT: andnl %edx, %edi, %eax +; CHECK-BMI-NEXT: notl %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, -1 ; %x @@ -656,11 +655,10 @@ ; ; CHECK-BMI-LABEL: in_constant_varx_42: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: notl %edx -; CHECK-BMI-NEXT: andl $42, %edx -; CHECK-BMI-NEXT: orl %edi, %edx -; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl $42, %eax +; CHECK-BMI-NEXT: andnl %eax, %edx, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, 42 ; %x %n1 = and i32 %n0, %mask @@ -702,9 +700,10 @@ ; ; CHECK-BMI-LABEL: in_constant_varx_42_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andnl %edi, %edx, %eax -; CHECK-BMI-NEXT: andl $42, %edx -; CHECK-BMI-NEXT: orl %edx, %eax +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: xorl $42, %eax +; CHECK-BMI-NEXT: andl %edx, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, 42 ; %x @@ -743,8 +742,8 @@ ; ; CHECK-BMI-LABEL: in_constant_mone_vary: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andnl %esi, %edx, %eax -; CHECK-BMI-NEXT: orl %edx, %eax +; CHECK-BMI-NEXT: andnl %edx, %esi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 -1, %y ; %x %n1 = and i32 %n0, %mask @@ -785,10 +784,9 @@ ; ; CHECK-BMI-LABEL: in_constant_mone_vary_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %esi ; CHECK-BMI-NEXT: notl %edx -; CHECK-BMI-NEXT: orl %esi, %edx -; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: andnl %edx, %esi, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %n0 = xor i32 -1, %y ; %x