diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14332,6 +14332,52 @@ return true; } + case Instruction::Or: { + // Pattern: Or(And(MaskValue, A), And(Not(MaskValue), B)) -> + // bitselect(MaskValue, A, B) where Not(MaskValue) = Xor(MaskValue, -1) + if (Subtarget->hasNEON()) { + // If any of the intermediate Ands have uses other than Or, don't fold + if (!I->getOperand(0)->hasOneUse() || !I->getOperand(0)->hasOneUse()) + return false; + + Instruction *OtherAnd, *IA, *IB; + Value *MaskValue; + // MainAnd refers to And instruction that has 'Not' as one of its operands + if (match(I, m_c_Or(m_Instruction(OtherAnd), + m_c_And(m_Not(m_Value(MaskValue)), + m_Instruction(IA))))) { + if (match(OtherAnd, + m_c_And(m_Specific(MaskValue), m_Instruction(IB)))) { + Instruction *MainAnd = I->getOperand(0) == OtherAnd + ? cast(I->getOperand(1)) + : cast(I->getOperand(0)); + + // Both Ands should be in same basic block as Or + if (I->getParent() != MainAnd->getParent() || + I->getParent() != OtherAnd->getParent()) + return false; + + // Non-mask operands of both Ands should also be in same basic block + if (I->getParent() != IA->getParent() || + I->getParent() != IB->getParent()) + return false; + + for (unsigned Idx = 0; Idx < MainAnd->getNumOperands(); Idx++) { + if (MainAnd->getOperand(Idx) != IA && + MainAnd->getOperand(Idx)->hasOneUse()) { + Ops.push_back(&MainAnd->getOperandUse(Idx)); + Ops.push_back(&I->getOperandUse(0)); + Ops.push_back(&I->getOperandUse(1)); + + return true; + } + } + } + } + } + + return false; + } case Instruction::Mul: { int NumZExts = 0, NumSExts = 0; for (auto &Op : I->operands()) { diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll --- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll @@ -144,3 +144,30 @@ %or = or <16 x i8> %and, %and1 ret <16 x i8> %or } + +define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32> %mask, i32 %scratch) { +; CHECK-LABEL: test_bit_sink_operand +; CHECK: // %do.body +; CHECK: bit v1.16b, v0.16b, v2.16b + +entry: + %0 = xor <4 x i32> %mask, + %div = sdiv i32 %scratch, 2 + br label %do.body + +do.body: + %dst.addr.0 = phi <4 x i32> [ %dst, %entry ], [ %vecins, %do.body ] + %src.addr.0 = phi <4 x i32> [ %src, %entry ], [ %vecins1, %do.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %vbsl3.i = and <4 x i32> %src.addr.0, %mask + %vbsl4.i = and <4 x i32> %dst.addr.0, %0 + %vbsl5.i = or <4 x i32> %vbsl3.i, %vbsl4.i + %vecins = insertelement <4 x i32> %vbsl5.i, i32 %scratch, i32 %i.0 + %vecins1 = insertelement <4 x i32> %src.addr.0, i32 %div, i32 %i.0 + %inc = add nuw nsw i32 %i.0, 1 + %exitcond.not = icmp eq i32 %inc, 5 + br i1 %exitcond.not, label %do.end, label %do.body + +do.end: + ret <4 x i32> %vecins +} \ No newline at end of file