diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2365,6 +2365,21 @@ return B; } } + + // TODO(MML): + // Include 'and' for instruction combine case (e.g., generate new + // instructions). + bool m1 = match(Op0, m_And(m_Value(A), m_APInt(C1))); + bool m2 = match(Op1, m_And(m_Specific(A), m_APInt(C2))); + bool andIsZero = (((*C1) & (*C2)).isZero()); + if (m1 && m2 && andIsZero) { + APInt orResult = ((*C1) | (*C2)); + const int bitSize = Op0->getType()->getScalarSizeInBits(); + if (orResult == APInt::getAllOnes(bitSize)) { + return A; + } + // rewrite + } } // If the operation is with the result of a phi instruction, check whether diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -371,6 +371,26 @@ Instruction *foldAndOrOfSelectUsingImpliedCond(Value *Op, SelectInst &SI, bool IsAnd); + // Imagine there is an instruction that applies `I` over `V0` and `V1` in + // `BB`, if this instruction could be simplified away in `BB`, returns the + // value; otherwise returns nullptr. + // + // Note simplification or combination as a result of new instruction won't be + // performed by the time this function returns, with the assumption that + // future iterations should be able to perform the simplification; if the + // assumption doesn't hold and newly created instruction cannot be simplified + // later, appending the newly created instruction might be counterproductive. + // + // Caller should guarantee that it's correct to append the newly-created + // instruction at the end of basic block (before terminator); for instance, + // see how `foldBinaryOpIntoPHI` folds a binary op with two PHI nodes as + // operands and call this helper function. + Value *couldSimplifyInBasicBlock(BinaryOperator &I, Value *V0, Value *V1, + BasicBlock *BB); + + Instruction *foldBinaryOpIntoPHI(BinaryOperator &I, PHINode *PN0, + PHINode *PN1); + public: /// Inserts an instruction \p New before instruction \p Old /// @@ -792,4 +812,4 @@ #undef DEBUG_TYPE -#endif // LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H +#endif // LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H \ No newline at end of file diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1289,17 +1289,119 @@ return replaceInstUsesWith(I, NewPN); } +Value *InstCombinerImpl::couldSimplifyInBasicBlock(BinaryOperator &I, Value *V0, + Value *V1, BasicBlock *BB) { + // Returns nullptr for now for ops that are simply not implemented yet. + if (I.getOpcode() != Instruction::Or) + return nullptr; + if (Value *V = SimplifyOrInst(V0, V1, SQ)) + return V; + + // FIXME: This function could be enhanced to cover more cases that could be + // simplified. + return nullptr; +} + +Instruction *InstCombinerImpl::foldBinaryOpIntoPHI(BinaryOperator &I, + PHINode *PN0, PHINode *PN1) { + const unsigned NumPHIValues = PN0->getNumIncomingValues(); + // FIXME: Generalize the function when PHI nodes have more than 2 incoming + // blocks. + if ((NumPHIValues != 2) || (NumPHIValues != PN1->getNumIncomingValues())) { + return nullptr; + } + + BasicBlock *PN0_BB0 = PN0->getIncomingBlock(0); + BasicBlock *PN0_BB1 = PN0->getIncomingBlock(1); + BasicBlock *PN1_BB0 = PN1->getIncomingBlock(0); + BasicBlock *PN1_BB1 = PN1->getIncomingBlock(1); + Value *PN0_VAL0 = PN0->getIncomingValue(0); + Value *PN0_VAL1 = PN0->getIncomingValue(1); + + if ((PN0_BB0 == PN1_BB1) && (PN0_BB1 == PN1_BB0)) { + std::swap(PN0_BB0, PN0_BB1); + std::swap(PN0_VAL0, PN0_VAL1); + } else if ((PN0_BB0 != PN1_BB0) || (PN0_BB1 != PN1_BB1)) { + return nullptr; + } + + Value *PN1_VAL0 = PN1->getIncomingValue(0); + Value *PN1_VAL1 = PN1->getIncomingValue(1); + + // Now both PHI nodes have two incoming blocks, and we know + // PN0_BB0 == PN1_BB0, and PNO_BB1 == PN1_BB1. + + // Validates both PHI nodes have one use, for simplicity. + if (!(PN0->hasOneUse() && PN1->hasOneUse())) { + return nullptr; + } + + if (couldSimplifyInBasicBlock(I, PN0_VAL0, PN1_VAL0, PN0_BB0) && + couldSimplifyInBasicBlock(I, PN0_VAL1, PN1_VAL1, PN1_BB1)) { + // If folding two PHI operands into respective predecessor blocks opens up + // opportunity for instruction simplification or combination, then transform + // + // bb2: + // res0 = phi [val00, bb0], [val01, bb1] + // res1 = phi [val10, bb0], [val11, bb1] + // binaryOp res0, res1 + // + // into + // + // bb0: + // res_bb0 = binaryOp val00, val10 + // bb1: + // res_bb1 = binaryOp val01, val11 + // bb2: + // phi [res_bb0, bb0], [res_bb1, bb1]. + + // Step 1: Create the new PHI node. + PHINode *NewPN = PHINode::Create(I.getType(), PN0->getNumIncomingValues()); + InsertNewInstBefore(NewPN, *PN0); + NewPN->takeName(PN0); + + // Step 2: Create binary ops and fold them into predecessor blocks. + // Step 2.1: Do the step for bb0 + Builder.SetInsertPoint(PN0_BB0->getTerminator()); + Value *ValFromBB0 = Builder.CreateBinOp(I.getOpcode(), PN0_VAL0, PN1_VAL0); + NewPN->addIncoming(ValFromBB0, PN0_BB0); + + // Step 2.1: Do the step for bb1 + Builder.SetInsertPoint(PN0_BB1->getTerminator()); + Value *ValFromBB1 = Builder.CreateBinOp(I.getOpcode(), PN0_VAL1, PN1_VAL1); + NewPN->addIncoming(ValFromBB1, PN0_BB1); + + return replaceInstUsesWith(I, NewPN); + } + return nullptr; +} + Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { - if (!isa(I.getOperand(1))) + Value *LHS = I.getOperand(0); + Value *RHS = I.getOperand(1); + const bool constantRHS = isa(RHS); + if (constantRHS) { + if (auto *Sel = dyn_cast(LHS)) { + if (Instruction *NewSel = FoldOpIntoSelect(I, Sel)) + return NewSel; + } else if (auto *PN = dyn_cast(LHS)) { + if (Instruction *NewPhi = foldOpIntoPhi(I, PN)) + return NewPhi; + } return nullptr; + } + + { + auto *PN0 = dyn_cast(LHS); + auto *PN1 = dyn_cast(RHS); - if (auto *Sel = dyn_cast(I.getOperand(0))) { - if (Instruction *NewSel = FoldOpIntoSelect(I, Sel)) - return NewSel; - } else if (auto *PN = dyn_cast(I.getOperand(0))) { - if (Instruction *NewPhi = foldOpIntoPhi(I, PN)) - return NewPhi; + if (PN0 && PN1) { + if (Instruction *newInst = foldBinaryOpIntoPHI(I, PN0, PN1)) { + return newInst; + } + } } + return nullptr; } @@ -4373,4 +4475,4 @@ void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createInstructionCombiningPass()); -} +} \ No newline at end of file diff --git a/llvm/test/Transforms/InstCombine/fold-phi-of-binary-op.ll b/llvm/test/Transforms/InstCombine/fold-phi-of-binary-op.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fold-phi-of-binary-op.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +; Meant to collect the test cases for PHI folding for binary ops in general. +; Currently only has test case for OR. + +; Tests that two PHI nodes of `or` (in `return` basic block) is folded into predecessors, +; and IR get simplified. +define dso_local i64 @_Z5ParsebPF6RetValvE(i1 zeroext %test, i64 ()* nocapture %p) local_unnamed_addr { +; CHECK-LABEL: @_Z5ParsebPF6RetValvE( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[TEST:%.*]], label [[RETURN:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[CALL:%.*]] = call i64 [[P:%.*]]() +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_SROA_3_0:%.*]] = phi i64 [ [[CALL]], [[IF_END]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i64 [[RETVAL_SROA_3_0]] +; +entry: + br i1 %test, label %return, label %if.end + +if.end: ; preds = %entry + %call = call i64 %p() + %retval.sroa.3.0.extract.shift = and i64 %call, -4294967296 + %phi.cast = and i64 %call, 4294967295 + br label %return + +return: ; preds = %entry, %if.end + %retval.sroa.0.0 = phi i64 [ %phi.cast, %if.end ], [ 0, %entry ] + %retval.sroa.3.0 = phi i64 [ %retval.sroa.3.0.extract.shift, %if.end ], [ 0, %entry ] + %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.3.0, %retval.sroa.0.0 + ret i64 %retval.sroa.0.0.insert.insert +} diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -46,11 +46,7 @@ ; CHECK: block1: ; CHECK-NEXT: br label [[BLOCK2]] ; CHECK: block2: -; CHECK-NEXT: [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP115:%.*]] = phi i1 [ true, [[BLOCK1]] ], [ false, [[ENTRY]] ] -; CHECK-NEXT: [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]] -; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[CMP1]] to i32 -; CHECK-NEXT: ret i32 [[CONV2]] +; CHECK-NEXT: ret i32 1 ; entry: br label %block2 @@ -76,11 +72,7 @@ ; CHECK: block1: ; CHECK-NEXT: br label [[BLOCK2]] ; CHECK: block2: -; CHECK-NEXT: [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP115:%.*]] = phi i1 [ true, [[BLOCK1]] ], [ false, [[ENTRY]] ] -; CHECK-NEXT: [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]] -; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[CMP1]] to i32 -; CHECK-NEXT: ret i32 [[CONV2]] +; CHECK-NEXT: ret i32 1 ; entry: br label %block2