Index: include/llvm/Transforms/Utils/PredicateInfo.h =================================================================== --- include/llvm/Transforms/Utils/PredicateInfo.h +++ include/llvm/Transforms/Utils/PredicateInfo.h @@ -53,6 +53,7 @@ #define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/ilist.h" @@ -192,8 +193,8 @@ typedef SmallVectorImpl ValueDFSStack; void convertUsesToDFSOrdered(Value *, SmallVectorImpl &); Value *materializeStack(unsigned int &, ValueDFSStack &, Value *); - bool stackIsInScope(const ValueDFSStack &, int DFSIn, int DFSOut) const; - void popStackUntilDFSScope(ValueDFSStack &, int DFSIn, int DFSOut); + bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const; + void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &); ValueInfo &getOrCreateValueInfo(Value *); const ValueInfo &getValueInfo(Value *) const; Function &F; @@ -213,6 +214,9 @@ DenseMap ValueInfoNums; // OrderedBasicBlocks used during sorting uses DenseMap> OBBMap; + // The set of edges along which we can only handle phi uses, due to critical + // edges. + DenseSet PhiUsesOnly; }; // This pass does eager building and then printing of PredicateInfo. It is used Index: lib/Transforms/Utils/PredicateInfo.cpp =================================================================== --- lib/Transforms/Utils/PredicateInfo.cpp +++ lib/Transforms/Utils/PredicateInfo.cpp @@ -66,10 +66,12 @@ int DFSIn = 0; int DFSOut = 0; unsigned int LocalNum = LN_Middle; - PredicateBase *PInfo = nullptr; // Only one of Def or Use will be set. Value *Def = nullptr; Use *Use = nullptr; + // Neither PInfo nor PhiOnly participate in the ordering + PredicateBase *PInfo = nullptr; + bool PhiOnly = false; }; // This compares ValueDFS structures, creating OrderedBasicBlocks where @@ -90,12 +92,43 @@ bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut); + // We want to put the def that will get used for a given set of phi uses, + // before those phi uses. + // So we sort by edge, then by def. + // Note that only phi nodes uses and defs can come last. + if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last) + return comparePHIRelated(A, B); + if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.Use) < std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.Use); return localComesBefore(A, B); } + // For a phi use, or a non-materialized def, return the edge it represents. + const std::pair + getBlockEdge(const ValueDFS &VD) const { + if (!VD.Def && VD.Use) { + auto *PHI = cast(VD.Use->getUser()); + return std::make_pair(PHI->getIncomingBlock(*VD.Use), PHI->getParent()); + } + // This is really a non-materialized def. + auto *PBranch = cast(VD.PInfo); + return std::make_pair(PBranch->BranchBB, PBranch->SplitBB); + } + + // For two phi related values, return the ordering. + bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const { + auto &ABlockEdge = getBlockEdge(A); + auto &BBlockEdge = getBlockEdge(B); + if (ABlockEdge < BBlockEdge) + return true; + if (ABlockEdge > BBlockEdge) + return false; + // Now sort defs before uses + return std::tie(A.Def, A.Use) < std::tie(B.Def, B.Use); + } + // Get the definition of an instruction that occurs in the middle of a block. Value *getMiddleDef(const ValueDFS &VD) const { if (VD.Def) @@ -160,16 +193,37 @@ } // namespace PredicateInfoClasses -bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack, int DFSIn, - int DFSOut) const { +bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack, + const ValueDFS &VDUse) const { if (Stack.empty()) return false; - return DFSIn >= Stack.back().DFSIn && DFSOut <= Stack.back().DFSOut; + // If it's a phi only use, make sure it's for this phi node edge, and that the + // use is in a phi node. If it's anything else, and the top of the stack is + // phionly, we need to pop the stack. We deliberately sort phi uses next to + // the defs they must go with so that we can know it's time to pop the stack + // when we hit the end of the phi uses for a given def. + if (Stack.back().PhiOnly) { + if (!VDUse.Use) + return false; + auto *PHI = dyn_cast(VDUse.Use->getUser()); + if (!PHI) + return false; + // The only phionly defs should be branch info. + auto *PBranch = dyn_cast(Stack.back().PInfo); + assert(PBranch && "Only branches should have PHIOnly defs"); + // Check edge + BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.Use); + if (EdgePred != PBranch->BranchBB) + return false; + } + + return (VDUse.DFSIn >= Stack.back().DFSIn && + VDUse.DFSOut <= Stack.back().DFSOut); } -void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack, int DFSIn, - int DFSOut) { - while (!Stack.empty() && !stackIsInScope(Stack, DFSIn, DFSOut)) +void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack, + const ValueDFS &VD) { + while (!Stack.empty() && !stackIsInScope(Stack, VD)) Stack.pop_back(); } @@ -271,20 +325,11 @@ SmallVector CmpOperands; BasicBlock *FirstBB = BI->getSuccessor(0); BasicBlock *SecondBB = BI->getSuccessor(1); - bool FirstSinglePred = FirstBB->getSinglePredecessor(); - bool SecondSinglePred = SecondBB->getSinglePredecessor(); SmallVector SuccsToProcess; bool isAnd = false; bool isOr = false; - // First make sure we have single preds for these successors, as we can't - // usefully propagate true/false info to them if there are multiple paths to - // them. - if (FirstSinglePred) - SuccsToProcess.push_back(FirstBB); - if (SecondSinglePred) - SuccsToProcess.push_back(SecondBB); - if (SuccsToProcess.empty()) - return; + SuccsToProcess.push_back(FirstBB); + SuccsToProcess.push_back(SecondBB); // Second, see if we have a comparison we support SmallVector ComparisonsToProcess; CmpInst::Predicate Pred; @@ -321,6 +366,8 @@ new PredicateBranch(Op, BranchBB, Succ, Cmp, TakenEdge); AllInfos.push_back(PB); OperandInfo.Infos.push_back(PB); + if (!Succ->getSinglePredecessor()) + PhiUsesOnly.insert({BranchBB, Succ}); } } CmpOperands.clear(); @@ -368,29 +415,14 @@ RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def; ValueDFS &Result = *RenameIter; auto *ValInfo = Result.PInfo; - // For branches, we can just place the operand in the split block. For - // assume, we have to place it right before the assume to ensure we dominate - // all of our uses. + // For branches, we can just place the operand in the branch block before + // the terminator. For assume, we have to place it right before the assume + // to ensure we dominate all of our uses. Always insert right before the + // relevant instruction (terminator, assume), so that we insert in proper + // order in the case of multiple predicateinfo in the same block. if (isa(ValInfo)) { auto *PBranch = cast(ValInfo); - // It's possible we are trying to insert multiple predicateinfos in the - // same block at the beginning of the block. When we do this, we need to - // insert them one after the other, not one before the other. To see if we - // have already inserted predicateinfo into this block, we see if Op != - // OrigOp && Op->getParent() == PBranch->SplitBB. Op must be an - // instruction we inserted if it's not the original op. - BasicBlock::iterator InsertPt; - if (Op == OrigOp || - cast(Op)->getParent() != PBranch->SplitBB) { - InsertPt = PBranch->SplitBB->begin(); - // Insert after last phi node. - while (isa(InsertPt)) - ++InsertPt; - } else { - // Insert after op. - InsertPt = ++(cast(Op)->getIterator()); - } - IRBuilder<> B(PBranch->SplitBB, InsertPt); + IRBuilder<> B(PBranch->BranchBB->getTerminator()); Function *IF = Intrinsic::getDeclaration( F.getParent(), Intrinsic::ssa_copy, Op->getType()); Value *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); @@ -400,12 +432,7 @@ auto *PAssume = dyn_cast(ValInfo); assert(PAssume && "Should not have gotten here without it being an assume"); - // Unlike above, this should already insert in the right order when we - // insert multiple predicateinfos in the same block. Because we are - // always inserting right before the assume (instead of the beginning of a - // block), newer insertions will end up after older ones. - IRBuilder<> B(PAssume->AssumeInst->getParent(), - PAssume->AssumeInst->getIterator()); + IRBuilder<> B(PAssume->AssumeInst); Function *IF = Intrinsic::getDeclaration( F.getParent(), Intrinsic::ssa_copy, Op->getType()); Value *PIC = B.CreateCall(IF, Op); @@ -447,28 +474,49 @@ // created otherwise. for (auto &PossibleCopy : ValueInfo.Infos) { ValueDFS VD; - BasicBlock *CopyBB = nullptr; // Determine where we are going to place the copy by the copy type. // The predicate info for branches always come first, they will get // materialized in the split block at the top of the block. // The predicate info for assumes will be somewhere in the middle, // it will get materialized in front of the assume. - if (const auto *PBranch = dyn_cast(PossibleCopy)) { - CopyBB = PBranch->SplitBB; - VD.LocalNum = LN_First; - } else if (const auto *PAssume = - dyn_cast(PossibleCopy)) { - CopyBB = PAssume->AssumeInst->getParent(); + if (const auto *PAssume = dyn_cast(PossibleCopy)) { VD.LocalNum = LN_Middle; - } else - llvm_unreachable("Unhandled predicate info type"); - DomTreeNode *DomNode = DT.getNode(CopyBB); - if (!DomNode) - continue; - VD.DFSIn = DomNode->getDFSNumIn(); - VD.DFSOut = DomNode->getDFSNumOut(); - VD.PInfo = PossibleCopy; - OrderedUses.push_back(VD); + DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent()); + if (!DomNode) + continue; + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + OrderedUses.push_back(VD); + } else if (const auto *PBranch = + dyn_cast(PossibleCopy)) { + // If we can only do phi uses, we treat it like it's in the branch + // block, and handle it specially. We know that it goes last, and only + // dominate phi uses. + if (PhiUsesOnly.count({PBranch->BranchBB, PBranch->SplitBB})) { + VD.LocalNum = LN_Last; + auto *DomNode = DT.getNode(PBranch->BranchBB); + if (DomNode) { + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + VD.PhiOnly = true; + OrderedUses.push_back(VD); + } + } else { + // Otherwise, we are in the split block (even though we perform + // insertion in the branch block). + // Insert a possible copy at the split block and before the branch. + VD.LocalNum = LN_First; + auto *DomNode = DT.getNode(PBranch->SplitBB); + if (DomNode) { + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + OrderedUses.push_back(VD); + } + } + } } convertUsesToDFSOrdered(Op, OrderedUses); @@ -492,10 +540,10 @@ << VD.DFSOut << ")\n"); bool ShouldPush = (VD.Def || PossibleCopy); - bool OutOfScope = !stackIsInScope(RenameStack, VD.DFSIn, VD.DFSOut); + bool OutOfScope = !stackIsInScope(RenameStack, VD); if (OutOfScope || ShouldPush) { // Sync to our current scope. - popStackUntilDFSScope(RenameStack, VD.DFSIn, VD.DFSOut); + popStackUntilDFSScope(RenameStack, VD); ShouldPush |= (VD.Def || PossibleCopy); if (ShouldPush) { RenameStack.push_back(VD); Index: test/Transforms/Util/PredicateInfo/diamond.ll =================================================================== --- /dev/null +++ test/Transforms/Util/PredicateInfo/diamond.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s +define i1 @f(i32 %x, i1 %y) { +; CHECK-LABEL: @f( +; CHECK-NEXT: br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0 +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[X2:%.*]] = add nuw nsw i32 [[X]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X2]], 2 +; CHECK: [[X2_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X2]]) +; CHECK-NEXT: br i1 [[CMP2]], label [[BB2]], label [[BB3]] +; CHECK: bb2: +; CHECK-NEXT: [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i1 false +; + br i1 %y, label %bb0, label %bb1 + bb0: + %cmp = icmp sge i32 %x, 0 ; x > 0 + br i1 %cmp, label %bb2, label %bb3 + bb1: + %x2 = add nsw nuw i32 %x, 1 + %cmp2 = icmp sge i32 %x2, 2 ; x+1 > 2 / x > 1 + br i1 %cmp2, label %bb2, label %bb3 + bb2: + %x3 = phi i32 [ %x, %bb0 ], [ %x2, %bb1 ] + br label %bb3 + bb3: + ret i1 0 +} + +define i1 @g(i32 %x, i1 %y) { +; CHECK-LABEL: @g( +; CHECK-NEXT: br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0 +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK-NEXT: br i1 [[CMP]], label [[BB3:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[X2:%.*]] = add nuw nsw i32 [[X]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sge i32 [[X2]], 2 +; CHECK: [[X2_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X2]]) +; CHECK-NEXT: br i1 [[CMP2]], label [[BB3]], label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i1 false +; + br i1 %y, label %bb0, label %bb1 + bb0: + %cmp = icmp sge i32 %x, 0 ; x > 0 + br i1 %cmp, label %bb3, label %bb2 + bb1: + %x2 = add nsw nuw i32 %x, 1 + %cmp2 = icmp sge i32 %x2, 2 ; x+1 > 2 / x > 1 + br i1 %cmp2, label %bb3, label %bb2 + bb2: + %x3 = phi i32 [ %x, %bb0 ], [ %x2, %bb1 ] + br label %bb3 + bb3: + ret i1 0 +} + Index: test/Transforms/Util/PredicateInfo/testandor.ll =================================================================== --- test/Transforms/Util/PredicateInfo/testandor.ll +++ test/Transforms/Util/PredicateInfo/testandor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s +; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s declare void @foo(i1) declare void @bar(i32)