Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -414,6 +414,7 @@ SDValue N2, SDValue N3, ISD::CondCode CC); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); + SDValue unfoldMaskedMerge(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans); SDValue rebuildSetCC(SDValue N); @@ -5361,6 +5362,86 @@ return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; } +// If the target has andn, bsl, or a similar bit-select instruction, +// we want to unfold masked merge, with canonical pattern of: +// | A | |B| +// ((x ^ y) & m) ^ y +// | D | +// Into: +// (x & m) | (y & ~m) +SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { + assert(N->getOpcode() == ISD::XOR); + + EVT VT = N->getValueType(0); + + // FIXME + if (VT.isVector()) + return SDValue(); + + auto matchD = [](SDValue D, SDValue Y) -> llvm::Optional /*X*/ { + if (D.getOpcode() != ISD::XOR) + return llvm::None; + SDValue D0 = D->getOperand(0); + SDValue D1 = D->getOperand(1); + if (D1 == Y) + return D0; + else if (D0 == Y) + return D1; + return llvm::None; + }; + + SDValue A, D, X, Y, M; + + auto matchA = [matchD, &D, &X, &Y, &M](SDValue A, SDValue B) -> bool { + if (A.getOpcode() != ISD::AND) + return false; + SDValue A0 = A.getOperand(0); + SDValue A1 = A.getOperand(1); + if (auto X_ = matchD(A0, B)) { + X = *X_; + D = A0; + M = A1; + Y = B; + return true; + } else if (auto X_ = matchD(A1, B)) { + X = *X_; + D = A1; + M = A0; + Y = B; + return true; + } + return false; + }; + + if (matchA(N->getOperand(0), N->getOperand(1))) + A = N->getOperand(0); + else if (matchA(N->getOperand(1), N->getOperand(0))) + A = N->getOperand(1); + else + return SDValue(); + + assert(A.getOpcode() == ISD::AND); + assert(D.getOpcode() == ISD::XOR); + + // 'A' and 'D' part will be replaced completely. + // Don't proceed they can't be dropped. + if (!(A.hasOneUse() && D.hasOneUse())) + return SDValue(); + + // We can transform if the targer has AndNot, or the mask is a constant. + // FIXME: is it always not unprofitable to unfold it for constants? + if (!(TLI.hasAndNot(M) || isa(M.getNode()))) + return SDValue(); + + SDLoc DL(N); + + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); + SDValue NotM = DAG.getNOT(DL, M, VT); + SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); + + return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); +} + SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5516,6 +5597,10 @@ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable + if (SDValue MM = unfoldMaskedMerge(N)) + return MM; + // Simplify the expression using non-local knowledge. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); Index: test/CodeGen/AArch64/unfold-masked-merge-scalar.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar.ll @@ -128,9 +128,9 @@ define i8 @in8(i8 %x, i8 %y, i8 %mask) { ; CHECK-LABEL: in8: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %n0 = xor i8 %x, %y %n1 = and i8 %n0, %mask @@ -141,9 +141,9 @@ define i8 @in8_constmask(i8 %x, i8 %y) { ; CHECK-LABEL: in8_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, #0x3c -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: lsr w8, w0, #2 +; CHECK-NEXT: bfi w1, w8, #2, #4 +; CHECK-NEXT: mov w0, w1 ; CHECK-NEXT: ret %n0 = xor i8 %x, %y %n1 = and i8 %n0, 60 @@ -156,9 +156,9 @@ define i16 @in16(i16 %x, i16 %y, i16 %mask) { ; CHECK-LABEL: in16: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %n0 = xor i16 %x, %y %n1 = and i16 %n0, %mask @@ -169,9 +169,9 @@ define i16 @in16_constmask(i16 %x, i16 %y) { ; CHECK-LABEL: in16_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, #0xff0 -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: lsr w8, w0, #4 +; CHECK-NEXT: bfi w1, w8, #4, #8 +; CHECK-NEXT: mov w0, w1 ; CHECK-NEXT: ret %n0 = xor i16 %x, %y %n1 = and i16 %n0, 4080 @@ -184,9 +184,9 @@ define i32 @in32(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in32: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -197,9 +197,9 @@ define i32 @in32_constmask(i32 %x, i32 %y) { ; CHECK-LABEL: in32_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, #0xffff00 -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: lsr w8, w0, #8 +; CHECK-NEXT: bfi w1, w8, #8, #16 +; CHECK-NEXT: mov w0, w1 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -212,9 +212,9 @@ define i64 @in64(i64 %x, i64 %y, i64 %mask) { ; CHECK-LABEL: in64: ; CHECK: // %bb.0: -; CHECK-NEXT: eor x8, x0, x1 -; CHECK-NEXT: and x8, x8, x2 -; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: bic x8, x1, x2 +; CHECK-NEXT: and x9, x0, x2 +; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret %n0 = xor i64 %x, %y %n1 = and i64 %n0, %mask @@ -225,9 +225,9 @@ define i64 @in64_constmask(i64 %x, i64 %y) { ; CHECK-LABEL: in64_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: eor x8, x0, x1 -; CHECK-NEXT: and x8, x8, #0xffffffff0000 -; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: lsr x8, x0, #16 +; CHECK-NEXT: bfi x1, x8, #16, #32 +; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: ret %n0 = xor i64 %x, %y %n1 = and i64 %n0, 281474976645120 @@ -242,9 +242,9 @@ define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_0_0_1: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w2, w8 -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -255,9 +255,9 @@ define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_0_1_0: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -268,9 +268,9 @@ define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_0_1_1: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w2, w8 -; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: bic w8, w1, w2 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -281,9 +281,9 @@ define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_1_0_0: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -294,9 +294,9 @@ define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_1_0_1: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w2, w8 -; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -307,9 +307,9 @@ define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_1_1_0: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, w2 -; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -320,9 +320,9 @@ define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_1_1_1: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w2, w8 -; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -337,9 +337,9 @@ define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { ; CHECK-LABEL: in_constmask_commutativity_0_1: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, #0xffff00 -; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: lsr w8, w0, #8 +; CHECK-NEXT: bfi w1, w8, #8, #16 +; CHECK-NEXT: mov w0, w1 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -350,9 +350,8 @@ define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { ; CHECK-LABEL: in_constmask_commutativity_1_0: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, #0xffff00 -; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: lsr w8, w1, #8 +; CHECK-NEXT: bfi w0, w8, #8, #16 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -363,9 +362,8 @@ define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { ; CHECK-LABEL: in_constmask_commutativity_1_1: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: and w8, w8, #0xffff00 -; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: lsr w8, w1, #8 +; CHECK-NEXT: bfi w0, w8, #8, #16 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -382,9 +380,9 @@ ; CHECK-LABEL: in_complex_y0: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: eor w9, w0, w8 -; CHECK-NEXT: and w9, w9, w3 -; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: and w9, w0, w3 +; CHECK-NEXT: bic w8, w8, w3 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -397,9 +395,9 @@ ; CHECK-LABEL: in_complex_y1: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: eor w9, w0, w8 -; CHECK-NEXT: and w9, w9, w3 -; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: and w9, w0, w3 +; CHECK-NEXT: bic w8, w8, w3 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -412,9 +410,9 @@ ; CHECK-LABEL: in_complex_y0_constmask: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: eor w9, w0, w8 -; CHECK-NEXT: and w9, w9, #0xffff00 -; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: lsr w9, w0, #8 +; CHECK-NEXT: bfi w8, w9, #8, #16 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -427,9 +425,9 @@ ; CHECK-LABEL: in_complex_y1_constmask: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: eor w9, w0, w8 -; CHECK-NEXT: and w9, w9, #0xffff00 -; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: lsr w9, w0, #8 +; CHECK-NEXT: bfi w8, w9, #8, #16 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y Index: test/CodeGen/X86/unfold-masked-merge-scalar.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar.ll @@ -220,10 +220,10 @@ ; ; CHECK-BMI-LABEL: in8: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: orl %edi, %eax +; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i8 %x, %y %n1 = and i8 %n0, %mask @@ -234,18 +234,18 @@ define i8 @in8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: in8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andb $-61, %sil ; CHECK-NOBMI-NEXT: andb $60, %dil -; CHECK-NOBMI-NEXT: xorb %dil, %sil -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: orb %sil, %dil +; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andb $-61, %sil ; CHECK-BMI-NEXT: andb $60, %dil -; CHECK-BMI-NEXT: xorb %dil, %sil -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: orb %sil, %dil +; CHECK-BMI-NEXT: movl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i8 %x, %y %n1 = and i8 %n0, 60 @@ -266,10 +266,10 @@ ; ; CHECK-BMI-LABEL: in16: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: orl %edi, %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i16 %x, %y %n1 = and i16 %n0, %mask @@ -280,18 +280,22 @@ define i16 @in16_constmask(i16 %x, i16 %y) { ; CHECK-NOBMI-LABEL: in16_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $-4081, %esi # imm = 0xF00F ; CHECK-NOBMI-NEXT: andl $4080, %edi # imm = 0xFF0 -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: leal (%rdi,%rsi), %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in16_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $-4081, %esi # imm = 0xF00F ; CHECK-BMI-NEXT: andl $4080, %edi # imm = 0xFF0 -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: leal (%rdi,%rsi), %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BMI-NEXT: retq %n0 = xor i16 %x, %y %n1 = and i16 %n0, 4080 @@ -312,10 +316,9 @@ ; ; CHECK-BMI-LABEL: in32: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -326,18 +329,20 @@ define i32 @in32_constmask(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in32_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF ; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: leal (%rdi,%rsi), %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in32_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF ; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: leal (%rdi,%rsi), %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -358,10 +363,9 @@ ; ; CHECK-BMI-LABEL: in64: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorq %rsi, %rdi +; CHECK-BMI-NEXT: andnq %rsi, %rdx, %rax ; CHECK-BMI-NEXT: andq %rdx, %rdi -; CHECK-BMI-NEXT: xorq %rsi, %rdi -; CHECK-BMI-NEXT: movq %rdi, %rax +; CHECK-BMI-NEXT: orq %rdi, %rax ; CHECK-BMI-NEXT: retq %n0 = xor i64 %x, %y %n1 = and i64 %n0, %mask @@ -372,18 +376,20 @@ define i64 @in64_constmask(i64 %x, i64 %y) { ; CHECK-NOBMI-LABEL: in64_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorq %rsi, %rdi +; CHECK-NOBMI-NEXT: movabsq $-281474976645121, %rcx # imm = 0xFFFF00000000FFFF +; CHECK-NOBMI-NEXT: andq %rsi, %rcx ; CHECK-NOBMI-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 ; CHECK-NOBMI-NEXT: andq %rdi, %rax -; CHECK-NOBMI-NEXT: xorq %rsi, %rax +; CHECK-NOBMI-NEXT: orq %rcx, %rax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in64_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorq %rsi, %rdi +; CHECK-BMI-NEXT: movabsq $-281474976645121, %rcx # imm = 0xFFFF00000000FFFF +; CHECK-BMI-NEXT: andq %rsi, %rcx ; CHECK-BMI-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 ; CHECK-BMI-NEXT: andq %rdi, %rax -; CHECK-BMI-NEXT: xorq %rsi, %rax +; CHECK-BMI-NEXT: orq %rcx, %rax ; CHECK-BMI-NEXT: retq %n0 = xor i64 %x, %y %n1 = and i64 %n0, 281474976645120 @@ -406,10 +412,9 @@ ; ; CHECK-BMI-LABEL: in_commutativity_0_0_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -428,10 +433,9 @@ ; ; CHECK-BMI-LABEL: in_commutativity_0_1_0: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -450,10 +454,9 @@ ; ; CHECK-BMI-LABEL: in_commutativity_0_1_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %edi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -472,10 +475,9 @@ ; ; CHECK-BMI-LABEL: in_commutativity_1_0_0: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andnl %edi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: orl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -494,10 +496,9 @@ ; ; CHECK-BMI-LABEL: in_commutativity_1_0_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andnl %edi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: orl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -516,10 +517,9 @@ ; ; CHECK-BMI-LABEL: in_commutativity_1_1_0: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andnl %edi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: orl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -538,10 +538,9 @@ ; ; CHECK-BMI-LABEL: in_commutativity_1_1_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andnl %edi, %edx, %eax ; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: orl %esi, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -556,18 +555,20 @@ define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF ; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: leal (%rdi,%rsi), %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_0_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF ; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: leal (%rdi,%rsi), %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -578,18 +579,20 @@ define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $-16776961, %edi # imm = 0xFF0000FF ; CHECK-NOBMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_1_0: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $-16776961, %edi # imm = 0xFF0000FF ; CHECK-BMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -600,18 +603,20 @@ define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { ; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $-16776961, %edi # imm = 0xFF0000FF ; CHECK-NOBMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %edi, %esi -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constmask_commutativity_1_1: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $-16776961, %edi # imm = 0xFF0000FF ; CHECK-BMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %edi, %esi -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 %x, %y %n1 = and i32 %n0, 16776960 @@ -636,10 +641,9 @@ ; CHECK-BMI-LABEL: in_complex_y0: ; CHECK-BMI: # %bb.0: ; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: andl %ecx, %edi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -661,10 +665,9 @@ ; CHECK-BMI-LABEL: in_complex_y1: ; CHECK-BMI: # %bb.0: ; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi ; CHECK-BMI-NEXT: andl %ecx, %edi -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax +; CHECK-BMI-NEXT: orl %edi, %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -676,20 +679,22 @@ define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { ; CHECK-NOBMI-LABEL: in_complex_y0_constmask: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF ; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: leal (%rdi,%rsi), %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y0_constmask: ; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF ; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: leal (%rdi,%rsi), %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y @@ -701,20 +706,22 @@ define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { ; CHECK-NOBMI-LABEL: in_complex_y1_constmask: ; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NOBMI-NEXT: andl %edx, %esi -; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF ; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-NOBMI-NEXT: xorl %esi, %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: leal (%rdi,%rsi), %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_complex_y1_constmask: ; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-BMI-NEXT: andl %edx, %esi -; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF ; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 -; CHECK-BMI-NEXT: xorl %esi, %edi -; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: leal (%rdi,%rsi), %eax ; CHECK-BMI-NEXT: retq %y = and i32 %y_hi, %y_low %n0 = xor i32 %x, %y