diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46577,6 +46577,59 @@ return Ret; } +static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R, + SDValue And1_L, SDValue And1_R, SDLoc DL, + SelectionDAG &DAG) { + if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse()) + return SDValue(); + SDValue NotOp = And0_L->getOperand(0); + if (NotOp == And1_R) + std::swap(And1_R, And1_L); + if (NotOp != And1_L) + return SDValue(); + + // (~(NotOp) & And0_R) | (NotOp & And1_R) + // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R + EVT VT = And1_L->getValueType(0); + SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R); + SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R); + SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp); + SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R); + return Xor1; +} + +/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the +/// equivalent `((x ^ y) & m) ^ y)` pattern. +/// This is typically a better representation for targets without a fused +/// "and-not" operation. This function is intended to be called from a +/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes. +static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) { + // Note that masked-merge variants using XOR or ADD expressions are + // normalized to OR by InstCombine so we only check for OR. + assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node"); + SDValue N0 = Node->getOperand(0); + if (N0->getOpcode() != ISD::AND || !N0->hasOneUse()) + return SDValue(); + SDValue N1 = Node->getOperand(1); + if (N1->getOpcode() != ISD::AND || !N1->hasOneUse()) + return SDValue(); + + SDLoc DL(Node); + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG)) + return Result; + if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG)) + return Result; + if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG)) + return Result; + if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG)) + return Result; + return SDValue(); +} + static SDValue combineOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -46670,6 +46723,11 @@ return Res; } + // We should fold "masked merge" patterns when `andn` is not available. + if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1) + if (SDValue R = foldMaskedMerge(N, DAG)) + return R; + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/fold-masked-merge.ll b/llvm/test/CodeGen/X86/fold-masked-merge.ll --- a/llvm/test/CodeGen/X86/fold-masked-merge.ll +++ b/llvm/test/CodeGen/X86/fold-masked-merge.ll @@ -8,11 +8,10 @@ define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) { ; NOBMI-LABEL: masked_merge0: ; NOBMI: # %bb.0: -; NOBMI-NEXT: movl %edi, %eax -; NOBMI-NEXT: andl %edi, %esi -; NOBMI-NEXT: notl %eax -; NOBMI-NEXT: andl %edx, %eax -; NOBMI-NEXT: orl %esi, %eax +; NOBMI-NEXT: movl %esi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: andl %edi, %eax +; NOBMI-NEXT: xorl %edx, %eax ; NOBMI-NEXT: retq ; ; BMI-LABEL: masked_merge0: @@ -54,14 +53,22 @@ } define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) { -; CHECK-LABEL: masked_merge2: -; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notb %al -; CHECK-NEXT: andb %sil, %al -; CHECK-NEXT: andb %dil, %sil -; CHECK-NEXT: orb %sil, %al -; CHECK-NEXT: retq +; NOBMI-LABEL: masked_merge2: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %esi, %eax +; NOBMI-NEXT: xorb %sil, %al +; NOBMI-NEXT: andb %dil, %al +; NOBMI-NEXT: xorb %sil, %al +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge2: +; BMI: # %bb.0: +; BMI-NEXT: movl %edi, %eax +; BMI-NEXT: notb %al +; BMI-NEXT: andb %sil, %al +; BMI-NEXT: andb %dil, %sil +; BMI-NEXT: orb %sil, %al +; BMI-NEXT: retq %not = xor i8 %a0, -1 %and0 = and i8 %not, %a1 %and1 = and i8 %a1, %a0 @@ -72,13 +79,12 @@ define i64 @masked_merge3(i64 %a0, i64 %a1, i64 %a2) { ; NOBMI-LABEL: masked_merge3: ; NOBMI: # %bb.0: -; NOBMI-NEXT: movq %rdi, %rax -; NOBMI-NEXT: notq %rsi +; NOBMI-NEXT: movq %rsi, %rax ; NOBMI-NEXT: notq %rdx -; NOBMI-NEXT: andq %rdi, %rsi +; NOBMI-NEXT: xorq %rdx, %rax ; NOBMI-NEXT: notq %rax -; NOBMI-NEXT: andq %rdx, %rax -; NOBMI-NEXT: orq %rsi, %rax +; NOBMI-NEXT: andq %rdi, %rax +; NOBMI-NEXT: xorq %rdx, %rax ; NOBMI-NEXT: retq ; ; BMI-LABEL: masked_merge3: