Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -12684,6 +12684,43 @@ that the optimizer can otherwise deduce or facts that are of little use to the optimizer. +.. _int_predicateinfo: + +'``llvm.predicateinfo``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare type @llvm.predicateinfo(type %operand) returned(1) readnone + +Arguments: +"""""""""" + +The first argument is an operand to which predicate info is attached. + +Overview: +"""""""""" + +The ``llvm.predicateinfo`` intrinsic is used to attach information to +operations used in comparisons, as well as to the results of those +comparisons. It is a copy operation used to build Extended SSA form, +and so is placed at the beginning of blocks dominated by the true or +false edges of branches, as well as blocks that are post-dominated by +assume operations. + +For operations used in branch comparisons, the information attached to +the intrinsic includes which edge direction the current block is +dominated by (true or false), as well as the original comparison. For +assumes, the information attached includes a pointer to the assume +instruction. + +The PredicateInfo analysis can be used to retrieve the attached +information. The intrinsic has no code-generation effect, and always +returns the first argument from the perspective of the optimizer. + .. _type.test: '``llvm.type.test``' Intrinsic Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -781,6 +781,10 @@ [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>]>; +//===----- Intrinsics that are used to provide predicate information -----===// + +def int_predicateinfo : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], + [IntrNoMem, Returned<0>]>; //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -284,6 +284,8 @@ void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); void initializePostRAHazardRecognizerPass(PassRegistry&); void initializePostRASchedulerPass(PassRegistry&); +void initializePredicateInfoWrapperPassPass(PassRegistry&); +void initializePredicateInfoPrinterLegacyPassPass(PassRegistry &); void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry&); void initializePrintBasicBlockPassPass(PassRegistry&); void initializePrintFunctionPassWrapperPass(PassRegistry&); Index: include/llvm/Transforms/Utils/PredicateInfo.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/PredicateInfo.h @@ -0,0 +1,291 @@ +//===- PredicateInfo.h - Build PredicateInfo ----------------------*-C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// \brief +// +// This file implements the PredicateInfo analysis, which creates an Extended +// SSA form for operations used in branch comparisons and llvm.assume +// comparisons. Copies of these operations are inserted into the true/false +// edge (and after assumes), and information attached to the copies. All uses +// of the original operation in blocks dominated by the true/false edge (and +// assume), are replaced with uses of the copies. This enables passes to easily +// and sparsely propagate condition based info into the operations that may be +// affected. +// +// Example: +// %cmp = icmp eq i32 %x, 50 +// br i1 %cmp, label %true, label %false +// true: +// ret i32 %x +// false: +// ret i32 1 +// +// will become +// +// %cmp = icmp eq i32, %x, 50 +// br i1 %cmp, label %true, label %false +// true: +// %x.0 = call @llvm.predicateinfo.i32(i32 %x) +// ret i32 %x +// false: +// ret i32 1 +// +// Using getPredicateInfoFor on x.0 will give you the comparison it is +// dominated by (the icmp), and that you are located in the true edge of that +// comparison, which tells you x.0 is 50. +// +// In order to reduce the number of copies inserted, predicateinfo is only +// inserted where it would actually be live. This means if there are no uses of +// an operation dominated by the branch edges, or by an assume, the associated +// predicate info is never inserted. +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H +#define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { + +class DominatorTree; +class Function; +class Instruction; +class MemoryAccess; +class LLVMContext; +class raw_ostream; +class OrderedBasicBlock; + +enum PredicateType { PT_Branch, PT_Assume }; + +// Base class for all predicate information we provide. +// All of our predicate information has at least a comparison. +class PredicateBase : public ilist_node { +public: + PredicateType Type; + CmpInst *Comparison; + PredicateBase(const PredicateBase &) = delete; + PredicateBase &operator=(const PredicateBase &) = delete; + PredicateBase() = delete; + static inline bool classof(const PredicateBase *) { return true; } + +protected: + PredicateBase(PredicateType PT, CmpInst *Comparison) + : Type(PT), Comparison(Comparison) {} +}; + +// Provides predicate information for assumes. Since assumes are always true, +// we simply provide the assume instruction, so you can tell your relative +// position to it. +class PredicateAssume : public PredicateBase { +public: + IntrinsicInst *AssumeInst; + PredicateAssume(IntrinsicInst *AssumeInst, CmpInst *Comparison) + : PredicateBase(PT_Assume, Comparison), AssumeInst(AssumeInst) {} + PredicateAssume() = delete; + static inline bool classof(const PredicateAssume *) { return true; } + static inline bool classof(const PredicateBase *PB) { + return PB->Type == PT_Assume; + } +}; + +// Provides predicate information for branches. +class PredicateBranch : public PredicateBase { +public: + // This is the block that is conditional upon the comparison. + BasicBlock *BranchBB; + // This is one of the true/false successors of BranchBB. + BasicBlock *SplitBB; + // If true, SplitBB is the true successor, otherwise it's the false successor. + bool TrueEdge; + PredicateBranch(BasicBlock *BranchBB, BasicBlock *SplitBB, + CmpInst *Comparison, bool TakenEdge) + : PredicateBase(PT_Branch, Comparison), BranchBB(BranchBB), + SplitBB(SplitBB), TrueEdge(TakenEdge) {} + PredicateBranch() = delete; + + static inline bool classof(const PredicateBranch *) { return true; } + static inline bool classof(const PredicateBase *PB) { + return PB->Type == PT_Branch; + } +}; + +// This name is used in a few places, so kick it into their own namespace +namespace PredicateInfoClasses { +struct ValueDFS; +} + +/// \brief Encapsulates PredicateInfo, including all data associated with memory +/// accesses. +class PredicateInfo { +public: +private: + // Used to store information about each value we might rename. + struct ValueInfo { + // Information about each possible copy. During processing, this is each + // inserted info. After processing, we move the uninserted ones to the + // uninserted vector. + SmallVector Infos; + SmallVector UninsertedInfos; + }; + // This owns the all the predicate infos in the function, placed or not. + iplist AllInfos; + +public: + PredicateInfo(Function &, DominatorTree &, AssumptionCache &); + ~PredicateInfo(); + + void verifyPredicateInfo() const; + + void dump() const; + void print(raw_ostream &) const; + + const PredicateBase *getPredicateInfoFor(const Value *V) const { + return PredicateMap.lookup(V); + } + +protected: + // Used by PredicateInfo annotater, dumpers, and wrapper pass. + friend class PredicateInfoAnnotatedWriter; + friend class PredicateInfoPrinterLegacyPass; + +private: + void buildPredicateInfo(); + void processAssume(IntrinsicInst *, BasicBlock *, SmallPtrSetImpl &); + void processBranch(BranchInst *, BasicBlock *, SmallPtrSetImpl &); + void renameUses(SmallPtrSetImpl &); + using ValueDFS = PredicateInfoClasses::ValueDFS; + typedef SmallVectorImpl ValueDFSStack; + void convertUsesToDFSOrdered(Value *, SmallVectorImpl &); + Value *materializeStack(unsigned int &, ValueDFSStack &, const ValueDFS &, + Value *); + bool stackIsInScope(const ValueDFSStack &, int DFSIn, int DFSOut) const; + void popStackUntilDFSScope(ValueDFSStack &, int DFSIn, int DFSOut); + ValueInfo &getOrCreateValueInfo(Value *); + const ValueInfo &getValueInfo(Value *) const; + Function &F; + DominatorTree &DT; + AssumptionCache &AC; + // This maps from copy operands to Predicate Info. Note that it does not own + // the Predicate Info, they belong to the ValueInfo structs in the ValueInfos + // vector. + DenseMap PredicateMap; + // This stores info about each operand or comparison result we make copies + // of. The real ValueInfos start at index 1, index 0 is unused so that we can + // more easily detect invalid indexing. + SmallVector ValueInfos; + // This gives the index into the ValueInfos array for a given Value. Because + // 0 is not a valid Value Info index, you can use DenseMap::lookup and tell + // whether it returned a valid result. + DenseMap ValueInfoNums; + // OrderedBasicBlocks used during sorting uses + DenseMap OBBMap; +}; + +// This pass does eager building and then printing of PredicateInfo. It is used +// by +// the tests to be able to build, dump, and verify PredicateInfo. +class PredicateInfoPrinterLegacyPass : public FunctionPass { +public: + PredicateInfoPrinterLegacyPass(); + + static char ID; + bool runOnFunction(Function &) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +/// An analysis that produces \c PredicateInfo for a function. +/// +class PredicateInfoAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + // Wrap PredicateInfo result to ensure address stability of internal + // PredicateInfo + // pointers after construction. Use a wrapper class instead of plain + // unique_ptr to avoid build breakage on MSVC. + struct Result { + Result(std::unique_ptr &&PredInfo) + : PredInfo(std::move(PredInfo)) {} + PredicateInfo &getPredInfo() { return *PredInfo.get(); } + + std::unique_ptr PredInfo; + }; + + Result run(Function &F, FunctionAnalysisManager &AM); +}; + +/// \brief Printer pass for \c PredicateInfo. +class PredicateInfoPrinterPass + : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit PredicateInfoPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// \brief Verifier pass for \c PredicateInfo. +struct PredicateInfoVerifierPass : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// \brief Legacy analysis pass which computes \c PredicateInfo. +class PredicateInfoWrapperPass : public FunctionPass { +public: + PredicateInfoWrapperPass(); + + static char ID; + bool runOnFunction(Function &) override; + void releaseMemory() override; + PredicateInfo &getPredInfo() { return *PredInfo; } + const PredicateInfo &getPredInfo() const { return *PredInfo; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + void verifyAnalysis() const override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + +private: + std::unique_ptr PredInfo; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -136,6 +136,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MemorySSA.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/SimplifyInstructions.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -110,6 +110,7 @@ FUNCTION_ANALYSIS("regions", RegionInfoAnalysis()) FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis()) FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis()) +FUNCTION_ANALYSIS("predicateinfo", PredicateInfoAnalysis()) FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) FUNCTION_ANALYSIS("targetir", Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -86,13 +86,13 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/MemorySSA.h" +#include "llvm/Transforms/Utils/PredicateInfo.h" #include #include #include using namespace llvm; using namespace PatternMatch; using namespace llvm::GVNExpression; - #define DEBUG_TYPE "newgvn" STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); @@ -213,6 +213,7 @@ AliasAnalysis *AA; MemorySSA *MSSA; MemorySSAWalker *MSSAWalker; + PredicateInfo *PredInfo; BumpPtrAllocator ExpressionAllocator; ArrayRecycler ArgRecycler; @@ -284,7 +285,8 @@ bool runOnFunction(Function &F) override; bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, - TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA); + TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, + PredicateInfo *PredInfo); private: void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -293,7 +295,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); - + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); } @@ -352,7 +354,10 @@ const BasicBlock *); const Expression *performSymbolicAggrValueEvaluation(Instruction *, const BasicBlock *); - + const Expression *performSymbolicCmpEvaluation(Instruction *I, + const BasicBlock *B); + const Expression *performSymbolicPredicateInfoEvaluation(Instruction *, + const BasicBlock *); // Congruence finding. // Templated to allow them to work both on BB's and BB-edges. template @@ -368,7 +373,7 @@ void updateReachableEdge(BasicBlock *, BasicBlock *); void processOutgoingEdges(TerminatorInst *, BasicBlock *); bool isOnlyReachableViaThisEdge(const BasicBlockEdge &) const; - Value *findConditionEquivalence(Value *, BasicBlock *) const; + Value *findConditionEquivalence(Value *, const BasicBlock *) const; // Elimination. struct ValueDFS; @@ -440,6 +445,7 @@ INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(PredicateInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -863,13 +869,86 @@ return E; } +const Expression * +NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I, + const BasicBlock *B) { + if (auto *PI = PredInfo->getPredicateInfoFor(I)) { + DEBUG(dbgs() << "Found predicate info from instruction !\n"); + auto *CopyOf = I->getOperand(0); + auto *Cmp = PI->Comparison; + // If this is an assume predicate and a copy of the comparison, it must be + // true. + if (isa(PI) && CopyOf == Cmp) + return createConstantExpression(ConstantInt::getTrue(Cmp->getType())); + + Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0), I, B); + Value *SecondOp = lookupOperandLeader(Cmp->getOperand(1), I, B); + // Sort the ops + CmpInst::Predicate Predicate = Cmp->getPredicate(); + // FIXME: We should really be ranking them here + if (FirstOp > SecondOp || + (isa(SecondOp) || isa(SecondOp))) { + std::swap(FirstOp, SecondOp); + Predicate = CmpInst::getSwappedPredicate(Predicate); + } + + if (isa(PI)) { + // If the comparison is true when the operands are equal, then we know the + // operands are equal, because assumes must always be true. + if (CmpInst::isTrueWhenEqual(Predicate)) + if (auto *C = dyn_cast(FirstOp)) + return createConstantExpression(C); + return createVariableExpression(FirstOp); + } else if (const auto *PBranch = dyn_cast(PI)) { + if (CopyOf == Cmp) { + if (CmpInst::isTrueWhenEqual(Predicate)) { + if (PBranch->TrueEdge) + return createConstantExpression( + ConstantInt::getTrue(Cmp->getType())); + else + return createConstantExpression( + ConstantInt::getFalse(Cmp->getType())); + } else if (CmpInst::isFalseWhenEqual(Predicate)) { + if (!PBranch->TrueEdge) + return createConstantExpression( + ConstantInt::getTrue(Cmp->getType())); + else + return createConstantExpression( + ConstantInt::getFalse(Cmp->getType())); + } + } else if ((PBranch->TrueEdge && CmpInst::isTrueWhenEqual(Predicate)) || + (!PBranch->TrueEdge && CmpInst::isFalseWhenEqual(Predicate))) { + if (auto *C = dyn_cast(FirstOp)) + return createConstantExpression(C); + return createVariableExpression(FirstOp); + } + } + } + return nullptr; +} + // Evaluate read only and pure calls, and create an expression result. const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I, const BasicBlock *B) { auto *CI = cast(I); - if (AA->doesNotAccessMemory(CI)) + if (auto *II = dyn_cast(I)) { + // Things with the returned attribute are copies of arguments + if (auto *ReturnedValue = II->getReturnedArgOperand()) { + if (II->getIntrinsicID() == Intrinsic::predicateinfo) { + const Expression *Result = performSymbolicPredicateInfoEvaluation(I, B); + if (Result) + return Result; + } + if (auto *C = dyn_cast(ReturnedValue)) + return createConstantExpression(C); + return createVariableExpression(ReturnedValue); + } + + } else if (AA->doesNotAccessMemory(CI)) { return createCallExpression(CI, nullptr, B); - if (AA->onlyReadsMemory(CI)) { + } + + else if (AA->onlyReadsMemory(CI)) { MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI); return createCallExpression(CI, lookupMemoryAccessEquiv(DefiningAccess), B); } @@ -914,6 +993,11 @@ // Evaluate PHI nodes symbolically, and create an expression result. const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, const BasicBlock *B) { + + const Expression *Result = performSymbolicPredicateInfoEvaluation(I, B); + if (Result) + return Result; + auto *E = cast(createPHIExpression(I)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. @@ -1012,6 +1096,73 @@ return createAggregateValueExpression(I, B); } +const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I, + const BasicBlock *B) { + CmpInst *CI = dyn_cast(I); + // See if our operands are equal to those of a previous predicate, and if so, + // if it implies true or false. + auto Op0 = lookupOperandLeader(CI->getOperand(0), I, B); + auto Op1 = lookupOperandLeader(CI->getOperand(1), I, B); + // Avoid processing the same info twice + const PredicateBase *LastPredInfo = nullptr; + + // See if we know something about the comparison itself, like it is the target + // of an assume. + auto *CmpPI = PredInfo->getPredicateInfoFor(I); + if (dyn_cast_or_null(CmpPI)) + return createConstantExpression(ConstantInt::getTrue(CI->getType())); + + // See if we know something just from the operands themselves + if (Op0 == Op1) { + if (CI->isTrueWhenEqual()) + return createConstantExpression(ConstantInt::getTrue(CI->getType())); + else if (CI->isFalseWhenEqual()) + return createConstantExpression(ConstantInt::getFalse(CI->getType())); + } + + // See if our operands have predicate info, so that we may be able to derive + // something from a previous comparison. + for (const auto &Op : CI->operands()) { + auto *PI = PredInfo->getPredicateInfoFor(Op); + if (const auto *PBranch = dyn_cast_or_null(PI)) { + if (PI == LastPredInfo) + continue; + LastPredInfo = PI; + // TODO: Along the false edge, we may know more things too, like icmp of + // same operands is false. + // + auto *BranchCond = PBranch->Comparison; + if (lookupOperandLeader(BranchCond->getOperand(0), I, B) == Op0 && + lookupOperandLeader(BranchCond->getOperand(1), I, B) == Op1) { + if (PBranch->TrueEdge) { + // If we know the previous predicate is true and we are in the true + // edge then we may be implied true or false. + if (CI->isImpliedTrueByMatchingCmp(BranchCond->getPredicate())) + return createConstantExpression( + ConstantInt::getTrue(CI->getType())); + if (CI->isImpliedFalseByMatchingCmp(BranchCond->getPredicate())) + return createConstantExpression( + ConstantInt::getFalse(CI->getType())); + } else { + // Just handle the ne and eq cases, where if we have the same + // operands, we may know something. + if (BranchCond->getPredicate() == CI->getPredicate()) { + // Same predicate, same ops,we know it was false, so this is false. + return createConstantExpression( + ConstantInt::getFalse(CI->getType())); + } else if (BranchCond->getPredicate() == CI->getInversePredicate()) { + // Inverse predicate, we know the other was false, so this is true. + // FIXME: Double check this + return createConstantExpression( + ConstantInt::getTrue(CI->getType())); + } + } + } + } + } + // Create expression will take care of simplifyCmpInst + return createExpression(I, B); +} // Substitute and symbolize the value before value numbering. const Expression *NewGVN::performSymbolicEvaluation(Value *V, @@ -1046,6 +1197,10 @@ case Instruction::BitCast: { E = createExpression(I, B); } break; + case Instruction::ICmp: + case Instruction::FCmp: { + E = performSymbolicCmpEvaluation(I, B); + } break; case Instruction::Add: case Instruction::FAdd: @@ -1065,8 +1220,6 @@ case Instruction::And: case Instruction::Or: case Instruction::Xor: - case Instruction::ICmp: - case Instruction::FCmp: case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -1364,7 +1517,8 @@ // Given a predicate condition (from a switch, cmp, or whatever) and a block, // see if we know some constant value for it already. -Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const { +Value *NewGVN::findConditionEquivalence(Value *Cond, + const BasicBlock *B) const { auto Result = lookupOperandLeader(Cond, nullptr, B); if (isa(Result)) return Result; @@ -1721,13 +1875,14 @@ // This is the main transformation entry point. bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, TargetLibraryInfo *_TLI, AliasAnalysis *_AA, - MemorySSA *_MSSA) { + MemorySSA *_MSSA, PredicateInfo *_PredInfo) { bool Changed = false; DT = _DT; AC = _AC; TLI = _TLI; AA = _AA; MSSA = _MSSA; + PredInfo = _PredInfo; DL = &F.getParent()->getDataLayout(); MSSAWalker = MSSA->getWalker(); @@ -1802,6 +1957,9 @@ while (TouchedInstructions.any()) { ++Iterations; // Walk through all the instructions in all the blocks in RPO. + // TODO: As we hit a new block, we should push and pop equalities into a + // table lookupOperandLeader can use, to catch things PredicateInfo + // might miss, like edge-only equivalences. for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; InstrNum = TouchedInstructions.find_next(InstrNum)) { @@ -1890,7 +2048,8 @@ &getAnalysis().getAssumptionCache(F), &getAnalysis().getTLI(), &getAnalysis().getAAResults(), - &getAnalysis().getMSSA()); + &getAnalysis().getMSSA(), + &getAnalysis().getPredInfo()); } PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager &AM) { @@ -1904,7 +2063,8 @@ auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); auto &MSSA = AM.getResult(F).getMSSA(); - bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA); + auto &PredInfo = AM.getResult(F).getPredInfo(); + bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA, &PredInfo); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; Index: lib/Transforms/Utils/CMakeLists.txt =================================================================== --- lib/Transforms/Utils/CMakeLists.txt +++ lib/Transforms/Utils/CMakeLists.txt @@ -38,6 +38,7 @@ MetaRenamer.cpp ModuleUtils.cpp NameAnonGlobals.cpp + PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp SSAUpdater.cpp Index: lib/Transforms/Utils/PredicateInfo.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/PredicateInfo.cpp @@ -0,0 +1,702 @@ +//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------===// +// +// This file implements the PredicateInfo class. +// +//===----------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/PredicateInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Scalar.h" +#include +#define DEBUG_TYPE "predicateinfo" +using namespace llvm; +using namespace PatternMatch; +using namespace llvm::PredicateInfoClasses; + +INITIALIZE_PASS_BEGIN(PredicateInfoWrapperPass, "predicateinfo", + "PredicateInfo", false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(PredicateInfoWrapperPass, "predicateinfo", "PredicateInfo", + false, true) + +INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo", + "PredicateInfo Printer", false, false) +INITIALIZE_PASS_DEPENDENCY(PredicateInfoWrapperPass) +INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo", + "PredicateInfo Printer", false, false) +static cl::opt VerifyPredicateInfo( + "verify-predicateinfo", cl::init(false), cl::Hidden, + cl::desc("Verify PredicateInfo in legacy printer pass.")); +namespace llvm { +namespace PredicateInfoClasses { +enum LocalNum { + // Operations that must appear first in the block. + LN_First, + // Operations that are somewhere in the middle of the block, and are sorted on + // demand. + LN_Middle, + // Operations that must appear last in a block, like successor phi node uses. + LN_Last +}; + +// Associate global and local DFS info with defs and uses, so we can sort them +// into a global domination ordering. +struct ValueDFS { + int DFSIn = 0; + int DFSOut = 0; + unsigned int LocalNum = LN_Middle; + PredicateBase *PInfo = nullptr; + // Only one of Def or Use will be set. + Value *Def = nullptr; + Use *Use = nullptr; +}; + +// This compares ValueDFS structures, creating OrderedBasicBlocks where +// necessary to compare uses/defs in the same block. Doing so allows us to walk +// the minimum number of instructions necessary to compute our def/use ordering. +struct ValueDFS_Compare { + mutable DenseMap OBBMap; + ValueDFS_Compare(DenseMap &OBBMap) + : OBBMap(OBBMap) {} + bool operator()(const ValueDFS &A, const ValueDFS &B) const { + if (&A == &B) + return false; + // The only case we can't directly compare them is when they in the same + // block, and both have localnum == middle. In that case, we have to use + // comesbefore to see what the real ordering is, because they are in the + // same basic block. + + bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut); + + if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) + return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.Use) < + std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.Use); + return localComesBefore(A, B); + } + + // This performs the necessary local basic block ordering checks to tell + // whether A comes before B, where both are in the same basic block. + bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const { + auto *ADef = A.Def; + auto *BDef = B.Def; + + // It's possible for the defs and uses to be null. For branches, the local + // numbering will say the placed predicaeinfos should go first (IE + // LN_beginning), so we won't be in this function. For assumes, we will end + // up here, beause we need to order the def we will placerelative to the + // assume. So for the purpose of this function, we pretend the def is the + // assume because that is where we will insert the info. + if (!ADef && !A.Use) { + assert(A.PInfo && + "No def, no use, and no predicateinfo should not occur"); + assert(isa(A.PInfo) && + "Middle of block should only occur for assumes"); + ADef = cast(A.PInfo)->AssumeInst; + } + if (!BDef && !B.Use) { + assert(B.PInfo && + "No def, no use, and no predicateinfo should not occur"); + assert(isa(B.PInfo) && + "Middle of block should only occur for assumes"); + BDef = cast(B.PInfo)->AssumeInst; + } + + // See if we have real values or uses. If we have real values, we are + // guaranteed they are instructions or arguments. No matter what, we are + // guaranteed they are in the same block if they are instructions. + Argument *ArgA = dyn_cast_or_null(ADef); + Argument *ArgB = dyn_cast_or_null(BDef); + + if (ArgA && !ArgB) + return true; + if (ArgB && !ArgA) + return false; + if (ArgA && ArgB) + return ArgA->getArgNo() < ArgB->getArgNo(); + + Instruction *AInst = nullptr; + Instruction *BInst = nullptr; + if (ADef) { + AInst = cast(ADef); + } else { + AInst = cast(A.Use->getUser()); + } + if (BDef) { + BInst = cast(BDef); + } else { + BInst = cast(B.Use->getUser()); + } + auto *BB = AInst->getParent(); + auto LookupResult = OBBMap.find(BB); + if (LookupResult != OBBMap.end()) + return LookupResult->second->dominates(AInst, BInst); + else { + auto *OBB = new OrderedBasicBlock(BB); + OBBMap.insert({BB, OBB}); + return OBB->dominates(AInst, BInst); + } + return std::tie(ADef, A.Use) < std::tie(BDef, B.Use); + } +}; + +} // namespace PredicateInfoClasses + +bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack, int DFSIn, + int DFSOut) const { + if (Stack.empty()) + return false; + return DFSIn >= Stack.back().DFSIn && DFSOut <= Stack.back().DFSOut; +} + +void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack, int DFSIn, + int DFSOut) { + while (!Stack.empty() && !stackIsInScope(Stack, DFSIn, DFSOut)) + Stack.pop_back(); +} + +// Convert the uses of Op into a vector of uses, associating global and local +// DFS info with each one. +void PredicateInfo::convertUsesToDFSOrdered( + Value *Op, SmallVectorImpl &DFSOrderedSet) { + for (auto &U : Op->uses()) { + if (auto *I = dyn_cast(U.getUser())) { + ValueDFS VD; + // Put the phi node uses in the incoming block. + BasicBlock *IBlock; +#if 0 + if (auto *PN = dyn_cast(I)) { + IBlock = PN->getIncomingBlock(U); + // Make phi node users appear last in the incoming block + // they are from. + VD.LocalNum = LN_Last; + } else +#endif + { + // If it's not a phi node use, it is somewhere in the middle of the + // block. + IBlock = I->getParent(); + VD.LocalNum = LN_Middle; + } + DomTreeNode *DomNode = DT.getNode(IBlock); + // It's possible our use is in an unreachable block. Skip it if so. + if (!DomNode) + continue; + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.Use = &U; + DFSOrderedSet.push_back(VD); + } + } +} + +// Collect relevant operations from Comparison that we may want to insert copies +// for. +void collectCmpOps(CmpInst *Comparison, SmallVectorImpl &CmpOperands) { + auto *Op0 = Comparison->getOperand(0); + auto *Op1 = Comparison->getOperand(1); + if (Op0 == Op1) + return; + CmpOperands.push_back(Comparison); + // Only want real values, not constants. Additionally, operands with one use + // are only being used in the comparison, which means they will not be useful + // for us to consider for predicateinfo. + // + // FIXME: LLVM crashes trying to create an intrinsic declaration of some + // pointer to function types that return structs, so we avoid them. + if ((isa(Op0) || isa(Op0)) && !Op0->hasOneUse() && + !(Op0->getType()->isPointerTy() && + Op0->getType()->getPointerElementType()->isFunctionTy())) + CmpOperands.push_back(Op0); + if ((isa(Op1) || isa(Op1)) && !Op1->hasOneUse() && + !(Op1->getType()->isPointerTy() && + Op1->getType()->getPointerElementType()->isFunctionTy())) + CmpOperands.push_back(Op1); +} + +// Process an assume instruction and place relevant operations we want to rename +// into OpsToRename. +void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, + SmallPtrSetImpl &OpsToRename) { + SmallVector CmpOperands; + // Second, see if we have a comparison we support + SmallVector ComparisonsToProcess; + CmpInst::Predicate Pred; + Value *Operand = II->getOperand(0); + if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()), + m_Cmp(Pred, m_Value(), m_Value())) + .match(II->getOperand(0))) { + ComparisonsToProcess.push_back( + cast(Operand)->getOperand(0)); + ComparisonsToProcess.push_back( + cast(Operand)->getOperand(1)); + } else { + ComparisonsToProcess.push_back(Operand); + } + for (auto Comparison : ComparisonsToProcess) { + if (CmpInst *Cmp = dyn_cast(Comparison)) { + collectCmpOps(Cmp, CmpOperands); + // Now add our copy infos for our operands + for (auto *Op : CmpOperands) { + OpsToRename.insert(Op); + auto &OperandInfo = getOrCreateValueInfo(Op); + PredicateBase *PB = new PredicateAssume(II, Cmp); + AllInfos.push_back(PB); + OperandInfo.Infos.push_back(PB); + } + CmpOperands.clear(); + } + } +} + +// Process a block terminating branch, and place relevant operations to be +// renamed into OpsToRename. +void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, + SmallPtrSetImpl &OpsToRename) { + SmallVector CmpOperands; + BasicBlock *FirstBB = BI->getSuccessor(0); + BasicBlock *SecondBB = BI->getSuccessor(1); + bool FirstSinglePred = FirstBB->getSinglePredecessor(); + bool SecondSinglePred = SecondBB->getSinglePredecessor(); + SmallVector SuccsToProcess; + // First make sure we have single preds for these successors, as we can't + // usefully propagate true/false info to them if there are multiple paths to + // them. + if (FirstSinglePred) + SuccsToProcess.push_back(FirstBB); + if (SecondSinglePred) + SuccsToProcess.push_back(SecondBB); + if (SuccsToProcess.empty()) + return; + // Second, see if we have a comparison we support + SmallVector ComparisonsToProcess; + CmpInst::Predicate Pred; + // We support and of comparisons because both must be true/false. Or does + // notreally tell us much useful most of the time. + if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()), + m_Cmp(Pred, m_Value(), m_Value())) + .match(BI->getCondition())) { + ComparisonsToProcess.push_back( + cast(BI->getCondition())->getOperand(0)); + ComparisonsToProcess.push_back( + cast(BI->getCondition())->getOperand(1)); + } else { + ComparisonsToProcess.push_back(BI->getCondition()); + } + for (auto Comparison : ComparisonsToProcess) { + if (CmpInst *Cmp = dyn_cast(Comparison)) { + collectCmpOps(Cmp, CmpOperands); + // Now add our copy infos for our operands + for (auto *Op : CmpOperands) { + OpsToRename.insert(Op); + auto &OperandInfo = getOrCreateValueInfo(Op); + for (auto *Succ : SuccsToProcess) { + bool TakenEdge = (Succ == FirstBB); + PredicateBase *PB = + new PredicateBranch(BranchBB, Succ, Cmp, TakenEdge); + AllInfos.push_back(PB); + OperandInfo.Infos.push_back(PB); + } + } + CmpOperands.clear(); + } + } +} + +// Build predicate info for our function +void PredicateInfo::buildPredicateInfo() { + DT.updateDFSNumbers(); + // Collect operands to rename from all conditional branch terminators, as well + // as assume statements. + SmallPtrSet OpsToRename; + for (auto DTN : depth_first(DT.getRootNode())) { + BasicBlock *BranchBB = DTN->getBlock(); + if (auto *BI = dyn_cast(BranchBB->getTerminator())) { + if (!BI->isConditional()) + continue; + processBranch(BI, BranchBB, OpsToRename); + } + } + for (auto &Assume : AC.assumptions()) { + if (auto *II = dyn_cast_or_null(Assume)) + processAssume(II, II->getParent(), OpsToRename); + } + // Now rename all our operations. + renameUses(OpsToRename); +} +Value *PredicateInfo::materializeStack(unsigned int &Counter, + ValueDFSStack &RenameStack, + const ValueDFS &VDUse, Value *OrigOp) { + // Find the first thing we have to materialize + auto RevIter = RenameStack.rbegin(); + for (; RevIter != RenameStack.rend(); ++RevIter) + if (RevIter->Def) + break; + + size_t Start = RevIter - RenameStack.rbegin(); + // The maximum number of things we should be trying to materialize at once + // right now is 4, depending on if we had an assume, a branch, and both used + // and of conditions. + for (auto RenameIter = RenameStack.end() - Start; + RenameIter != RenameStack.end(); ++RenameIter) { + auto *Op = + RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def; + ValueDFS &Result = *RenameIter; + // If the use isn't in a phi node, create a normal predicateinfo before + // the use, otherwise we have create a single argument phi to place + // before the phi node use. + auto *ValInfo = Result.PInfo; + // For branches, we can just place the operand in the split block or in a + // phi in the split block (if the use is in a phi). Otherwise, we have to + // place it right before the assume to ensure we dominate all of our uses. + + // We only have to worry about placing phi vs non phi when we are actually + // placing a predicateinfo copy in the same block as the use that got passed + // in, and not when we are materializing other parts of the stack. + if (isa(ValInfo) && + (std::tie(VDUse.DFSIn, VDUse.DFSOut) != + std::tie(RenameIter->DFSIn, RenameIter->DFSOut) || + !isa(VDUse.Use->getUser()))) { + auto *PBranch = cast(ValInfo); + // It's possible we are trying to insert multiple predicateinfos in the + // same block at the beginning of the block. When we do this, we need to + // insert them one after the other, not one before the other. To see if we + // have already inserted predicateinfo into this block, we see if Op != + // OrigOp && Op->getParent() == PBranch->SplitBB. Op must be an + // instruction we inserted if it's not the original op. + BasicBlock::iterator InsertPt; + if (Op == OrigOp || + cast(Op)->getParent() != PBranch->SplitBB) { + InsertPt = PBranch->SplitBB->begin(); + // Insert after last phi node. + while (isa(InsertPt)) + ++InsertPt; + } else { + // Insert after op. + InsertPt = ++(cast(Op)->getIterator()); + } + IRBuilder<> B(PBranch->SplitBB, InsertPt); + Function *IF = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::predicateinfo, Op->getType()); + Value *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); + PredicateMap.insert({PIC, ValInfo}); + Result.Def = PIC; + } else if (auto *PBranch = dyn_cast(ValInfo)) { + // It's also possible that we are trying to insert multiple predicateinfos + // at the beginning of the block here as well. We simply place the phi + // nodes in order (LLVM's phi nodes are not simultaneous). + BasicBlock::iterator InsertPt; + if (Op == OrigOp || + cast(Op)->getParent() != PBranch->SplitBB) + InsertPt = PBranch->SplitBB->begin(); + else + InsertPt = ++(cast(Op)->getIterator()); + + IRBuilder<> B(PBranch->SplitBB, InsertPt); + PHINode *PHI = B.CreatePHI(Op->getType(), 1, "pred." + Op->getName() + + "." + Twine(Counter++)); + PHI->addIncoming(Op, PBranch->BranchBB); + PredicateMap.insert({PHI, ValInfo}); + Result.Def = PHI; + } else { + auto *PAssume = dyn_cast(ValInfo); + assert(PAssume && + "Should not have gotten here without it being an assume"); + // Unlike above, this should already insert in the right order when we + // insert multiple predicateinfos in the same block. Because we are + // always inserting right before the assume (instead of the beginning of a + // block), newer insertions will end up after older ones. + IRBuilder<> B(PAssume->AssumeInst->getParent(), + PAssume->AssumeInst->getIterator()); + Function *IF = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::predicateinfo, Op->getType()); + Value *PIC = B.CreateCall(IF, Op); + PredicateMap.insert({PIC, ValInfo}); + Result.Def = PIC; + } + } + return RenameStack.back().Def; +} + +// Instead of the standard SSA renaming algorithm, which is O(Number of +// instructions), and walks the entire dominator tree, we walk only the defs + +// uses. The standard SSA renaming algorithm does not really rely on the +// dominator tree except to order the stack push/pops of the renaming stacks, so +// that defs end up getting pushed before hitting the correct uses. This does +// not require the dominator tree, only the *order* of the dominator tree. The +// complete and correct ordering of the defs and uses, in dominator tree is +// contained in the DFS numbering of the dominator tree. So we sort the defs and +// uses into the DFS ordering, and then just use the renaming stack as per +// normal, pushing when we hit a def (which is a predicateinfo instruction), +// popping when we are out of the dfs scope for that def, and replacing any uses +// with top of stack if it exists. In order to handle liveness without +// propagating liveness info, we don't actually insert the predicateinfo +// instruction def until we see a use that it would dominate. Once we see such +// a use, we materialize the predicateinfo instruction in the right place and +// use it. +// +// TODO: Use this algorithm to perform fast single-variable renaming in +// promotememtoreg and memoryssa. +void PredicateInfo::renameUses(SmallPtrSetImpl &OpsToRename) { + ValueDFS_Compare A(OBBMap); + // Compute liveness, and rename in O(uses) per Op. + for (auto *Op : OpsToRename) { + unsigned Counter = 0; + SmallVector OrderedUses; + const auto &ValueInfo = getValueInfo(Op); + // Insert the possible copies into the def/use list. + // They will become real copies if we find a real use for them, and never + // created otherwise. + for (auto &PossibleCopy : ValueInfo.Infos) { + ValueDFS VD; + BasicBlock *CopyBB = nullptr; + // Determine where we are going to place the copy by the copy type. + // The predicate info for branches always come first, they will get + // materialized in the split block at the top of the block. + // The predicate info for assumes will be somewhere in the middle, + // it will get materialized in front of the assume. + if (const auto *PBranch = dyn_cast(PossibleCopy)) { + CopyBB = PBranch->SplitBB; + VD.LocalNum = LN_First; + } else if (const auto *PAssume = + dyn_cast(PossibleCopy)) { + CopyBB = PAssume->AssumeInst->getParent(); + VD.LocalNum = LN_Middle; + } else + llvm_unreachable("Unhandled predicate info type"); + DomTreeNode *DomNode = DT.getNode(CopyBB); + if (!DomNode) + continue; + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + OrderedUses.push_back(VD); + } + + convertUsesToDFSOrdered(Op, OrderedUses); + std::sort(OrderedUses.begin(), OrderedUses.end(), A); + SmallVector RenameStack; + // For each use, sorted into dfs order, push values and replaces uses with + // top of stack, which will represent the reaching def. + for (auto &VD : OrderedUses) { + // We currently do not materialize copy over copy, but we should decide if + // we want to. + bool PossibleCopy = VD.PInfo != nullptr; + if (RenameStack.empty()) { + DEBUG(dbgs() << "Rename Stack is empty\n"); + } else { + DEBUG(dbgs() << "Rename Stack Top DFS numbers are (" + << RenameStack.back().DFSIn << "," + << RenameStack.back().DFSOut << ")\n"); + } + + DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << "," + << VD.DFSOut << ")\n"); + + bool ShouldPush = (VD.Def || PossibleCopy); + bool OutOfScope = !stackIsInScope(RenameStack, VD.DFSIn, VD.DFSOut); + if (OutOfScope || ShouldPush) { + // Sync to our current scope. + popStackUntilDFSScope(RenameStack, VD.DFSIn, VD.DFSOut); + ShouldPush |= (VD.Def || PossibleCopy); + if (ShouldPush) { + RenameStack.push_back(VD); + } + } + // If we get to this point, and the stack is empty we must have a use + // with no renaming needed, just skip it. + if (RenameStack.empty()) + continue; + // Skip values, only want to rename the uses + if (VD.Def || PossibleCopy) + continue; + ValueDFS &Result = RenameStack.back(); + + // If the possible copy dominates something, materialize our stack up to + // this point. This ensures every comparison that affects our operation + // ends up with predicateinfo. + if (!Result.Def) + Result.Def = materializeStack(Counter, RenameStack, VD, Op); + + DEBUG(dbgs() << "Found replacement " << *Result.Def << " for " + << *VD.Use->get() << " in " << *(VD.Use->getUser()) << "\n"); + assert(DT.dominates(cast(Result.Def), *VD.Use) && + "Predicateinfo def should have dominated this use"); + VD.Use->set(Result.Def); + } + } +} + +PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) { + auto OIN = ValueInfoNums.find(Operand); + if (OIN == ValueInfoNums.end()) { + // This will grow it + ValueInfos.resize(ValueInfos.size() + 1); + // This will use the new size and give us a 0 based number of the info + auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1}); + assert(InsertResult.second && "Value info number already existed?"); + return ValueInfos[InsertResult.first->second]; + } + return ValueInfos[OIN->second]; +} + +const PredicateInfo::ValueInfo & +PredicateInfo::getValueInfo(Value *Operand) const { + auto OINI = ValueInfoNums.lookup(Operand); + assert(OINI != 0 && "Operand was not really in the Value Info Numbers"); + assert(OINI < ValueInfos.size() && + "Value Info Number greater than size of Value Info Table"); + return ValueInfos[OINI]; +} + +PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, + AssumptionCache &AC) + : F(F), DT(DT), AC(AC) { + // Push an empty operand info so that we can detect 0 as not finding one + ValueInfos.resize(1); + buildPredicateInfo(); +} +PredicateInfo::~PredicateInfo() { + for (auto KV : OBBMap) + delete KV.second; +} + +void PredicateInfo::verifyPredicateInfo() const {} + +char PredicateInfoPrinterLegacyPass::ID = 0; + +PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass() + : FunctionPass(ID) { + initializePredicateInfoPrinterLegacyPassPass( + *PassRegistry::getPassRegistry()); +} + +void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addPreserved(); +} + +bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { + auto &PredInfo = getAnalysis().getPredInfo(); + PredInfo.print(dbgs()); + if (VerifyPredicateInfo) + PredInfo.verifyPredicateInfo(); + return false; +} + +AnalysisKey PredicateInfoAnalysis::Key; + +PredicateInfoAnalysis::Result +PredicateInfoAnalysis::run(Function &F, FunctionAnalysisManager &AM) { + auto &DT = AM.getResult(F); + auto &AC = AM.getResult(F); + return PredicateInfoAnalysis::Result(make_unique(F, DT, AC)); +} + +PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + OS << "PredicateInfo for function: " << F.getName() << "\n"; + AM.getResult(F).getPredInfo().print(OS); + + return PreservedAnalyses::all(); +} + +/// \brief An assembly annotator class to print PredicateInfo information in +/// comments. +class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter { + friend class PredicateInfo; + const PredicateInfo *PredInfo; + +public: + PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) {} + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + if (const auto *PI = PredInfo->getPredicateInfoFor(I)) { + OS << "; Has predicate info\n"; + if (const auto *PB = dyn_cast(PI)) + OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge + << " Comparison:" << *PB->Comparison << " }\n"; + else if (const auto *PA = dyn_cast(PI)) + OS << "; assume predicate info {" + << " Comparison:" << *PA->Comparison << " }\n"; + } + } +}; + +void PredicateInfo::print(raw_ostream &OS) const { + PredicateInfoAnnotatedWriter Writer(this); + F.print(OS, &Writer); +} + +void PredicateInfo::dump() const { + PredicateInfoAnnotatedWriter Writer(this); + F.print(dbgs(), &Writer); +} + +PreservedAnalyses PredicateInfoVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + AM.getResult(F).getPredInfo().verifyPredicateInfo(); + + return PreservedAnalyses::all(); +} + +char PredicateInfoWrapperPass::ID = 0; + +PredicateInfoWrapperPass::PredicateInfoWrapperPass() : FunctionPass(ID) { + initializePredicateInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +void PredicateInfoWrapperPass::releaseMemory() { PredInfo.reset(); } + +void PredicateInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequired(); +} + +bool PredicateInfoWrapperPass::runOnFunction(Function &F) { + auto &DT = getAnalysis().getDomTree(); + auto &AC = getAnalysis().getAssumptionCache(F); + PredInfo.reset(new PredicateInfo(F, DT, AC)); + return true; +} + +void PredicateInfoWrapperPass::verifyAnalysis() const { + PredInfo->verifyPredicateInfo(); +} + +void PredicateInfoWrapperPass::print(raw_ostream &OS, const Module *M) const { + PredInfo->print(OS); +} +} Index: lib/Transforms/Utils/Utils.cpp =================================================================== --- lib/Transforms/Utils/Utils.cpp +++ lib/Transforms/Utils/Utils.cpp @@ -38,6 +38,8 @@ initializeMemorySSAWrapperPassPass(Registry); initializeMemorySSAPrinterLegacyPassPass(Registry); initializeStripGCRelocatesPass(Registry); + initializePredicateInfoWrapperPassPass(Registry); + initializePredicateInfoPrinterLegacyPassPass(Registry); } /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. Index: test/Transforms/Util/PredicateInfo/condprop.ll =================================================================== --- /dev/null +++ test/Transforms/Util/PredicateInfo/condprop.ll @@ -0,0 +1,476 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -print-predicateinfo -S | FileCheck %s + +@a = external global i32 ; [#uses=7] + +define i32 @test1() nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB:%.*]], label [[BB1:%.*]] +; CHECK: bb: +; CHECK-NEXT: br label [[BB8:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 5 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 4 +; CHECK-NEXT: br i1 [[TMP5]], label [[BB4:%.*]], label [[BB5:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 5 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb5: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 5 +; CHECK-NEXT: br i1 [[TMP9]], label [[BB6:%.*]], label [[BB7:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb7: +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb8: +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP12]], [[BB7]] ], [ [[TMP11]], [[BB6]] ], [ [[TMP7]], [[BB4]] ], [ 4, [[BB2]] ], [ 5, [[BB]] ] +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 [[DOT0]] +; +entry: + %0 = load i32, i32* @a, align 4 + %1 = icmp eq i32 %0, 4 + br i1 %1, label %bb, label %bb1 + +bb: ; preds = %entry + br label %bb8 + +bb1: ; preds = %entry + %2 = load i32, i32* @a, align 4 + %3 = icmp eq i32 %2, 5 + br i1 %3, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + br label %bb8 + +bb3: ; preds = %bb1 + %4 = load i32, i32* @a, align 4 + %5 = icmp eq i32 %4, 4 + br i1 %5, label %bb4, label %bb5 + +bb4: ; preds = %bb3 + %6 = load i32, i32* @a, align 4 + %7 = add i32 %6, 5 + br label %bb8 + +bb5: ; preds = %bb3 + %8 = load i32, i32* @a, align 4 + %9 = icmp eq i32 %8, 5 + br i1 %9, label %bb6, label %bb7 + +bb6: ; preds = %bb5 + %10 = load i32, i32* @a, align 4 + %11 = add i32 %10, 4 + br label %bb8 + +bb7: ; preds = %bb5 + %12 = load i32, i32* @a, align 4 + br label %bb8 + +bb8: ; preds = %bb7, %bb6, %bb4, %bb2, %bb + %.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ] + br label %return + +return: ; preds = %bb8 + ret i32 %.0 +} + +declare void @foo(i1) +declare void @bar(i32) + +; CHECK-LABEL: @test3( +define void @test3(i32 %x, i32 %y) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] +; CHECK-NEXT: br i1 [[Z]], label [[BOTH_ZERO:%.*]], label [[NOPE:%.*]] +; CHECK: both_zero: +; CHECK-NEXT: [[Y_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[Y]]) +; CHECK-NEXT: [[YZ_0:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[YZ]]) +; CHECK-NEXT: [[X_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X]]) +; CHECK-NEXT: [[XZ_0:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[XZ]]) +; CHECK-NEXT: call void @foo(i1 [[XZ_0]]) +; CHECK-NEXT: call void @foo(i1 [[YZ_0]]) +; CHECK-NEXT: call void @bar(i32 [[X_0]]) +; CHECK-NEXT: call void @bar(i32 [[Y_0]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[Z]]) +; CHECK-NEXT: ret void +; + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = and i1 %xz, %yz + br i1 %z, label %both_zero, label %nope +both_zero: + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + ret void +nope: + call void @foo(i1 %z) + ret void +} + +; CHECK-LABEL: @test4( +define void @test4(i1 %b, i32 %x) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]] +; CHECK: sw: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 0, label [[CASE0:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 2, label [[CASE0]] +; CHECK-NEXT: i32 3, label [[CASE3]] +; CHECK-NEXT: i32 4, label [[DEFAULT]] +; CHECK-NEXT: ] +; CHECK: default: +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: ret void +; CHECK: case0: +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: ret void +; CHECK: case1: +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: ret void +; CHECK: case3: +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: ret void +; + br i1 %b, label %sw, label %case3 +sw: + switch i32 %x, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case0 + i32 3, label %case3 + i32 4, label %default + ] +default: + call void @bar(i32 %x) + ret void +case0: + call void @bar(i32 %x) + ret void +case1: + call void @bar(i32 %x) + ret void +case3: + call void @bar(i32 %x) + ret void +} + +; CHECK-LABEL: @test5( +define i1 @test5(i32 %x, i32 %y) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: [[Y_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[Y]]) +; CHECK-NEXT: [[X_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X_0]], [[Y_0]] +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: [[Y_1:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[Y]]) +; CHECK-NEXT: [[X_1:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X]]) +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[X_1]], [[Y_1]] +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp = icmp eq i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = icmp ne i32 %x, %y + ret i1 %cmp2 + +different: + %cmp3 = icmp eq i32 %x, %y + ret i1 %cmp3 +} + +; CHECK-LABEL: @test6( +define i1 @test6(i32 %x, i32 %y) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[X]], [[Y]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp2 = icmp ne i32 %x, %y + %cmp = icmp eq i32 %x, %y + %cmp3 = icmp eq i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + ret i1 %cmp2 + +different: + ret i1 %cmp3 +} + +; CHECK-LABEL: @test6_fp( +define i1 @test6_fp(float %x, float %y) { +; CHECK-LABEL: @test6_fp( +; CHECK-NEXT: [[CMP2:%.*]] = fcmp une float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[X]], [[Y]] +; CHECK-NEXT: [[CMP3:%.*]] = fcmp oeq float [[X]], [[Y]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp2 = fcmp une float %x, %y + %cmp = fcmp oeq float %x, %y + %cmp3 = fcmp oeq float %x, %y + br i1 %cmp, label %same, label %different + +same: + ret i1 %cmp2 + +different: + ret i1 %cmp3 +} + +; CHECK-LABEL: @test7( +define i1 @test7(i32 %x, i32 %y) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: [[Y_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[Y]]) +; CHECK-NEXT: [[X_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[X_0]], [[Y_0]] +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: [[Y_1:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[Y]]) +; CHECK-NEXT: [[X_1:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X]]) +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X_1]], [[Y_1]] +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp = icmp sgt i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = icmp sle i32 %x, %y + ret i1 %cmp2 + +different: + %cmp3 = icmp sgt i32 %x, %y + ret i1 %cmp3 +} + +; CHECK-LABEL: @test7_fp( +define i1 @test7_fp(float %x, float %y) { +; CHECK-LABEL: @test7_fp( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: [[Y_0:%.*]] = call float @llvm.predicateinfo.f32(float [[Y]]) +; CHECK-NEXT: [[X_0:%.*]] = call float @llvm.predicateinfo.f32(float [[X]]) +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ule float [[X_0]], [[Y_0]] +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: [[Y_1:%.*]] = call float @llvm.predicateinfo.f32(float [[Y]]) +; CHECK-NEXT: [[X_1:%.*]] = call float @llvm.predicateinfo.f32(float [[X]]) +; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X_1]], [[Y_1]] +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp = fcmp ogt float %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = fcmp ule float %x, %y + ret i1 %cmp2 + +different: + %cmp3 = fcmp ogt float %x, %y + ret i1 %cmp3 +} + +; CHECK-LABEL: @test8( +define i1 @test8(i32 %x, i32 %y) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp2 = icmp sle i32 %x, %y + %cmp = icmp sgt i32 %x, %y + %cmp3 = icmp sgt i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + ret i1 %cmp2 + +different: + ret i1 %cmp3 +} + +; CHECK-LABEL: @test8_fp( +define i1 @test8_fp(float %x, float %y) { +; CHECK-LABEL: @test8_fp( +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ule float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X]], [[Y]] +; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X]], [[Y]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp2 = fcmp ule float %x, %y + %cmp = fcmp ogt float %x, %y + %cmp3 = fcmp ogt float %x, %y + br i1 %cmp, label %same, label %different + +same: + ret i1 %cmp2 + +different: + ret i1 %cmp3 +} + +; PR1768 +; CHECK-LABEL: @test9( +define i32 @test9(i32 %i, i32 %j) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: [[J_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[J]]) +; CHECK-NEXT: [[I_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[I]]) +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]] +; CHECK-NEXT: ret i32 [[DIFF]] +; CHECK: ret: +; CHECK-NEXT: ret i32 5 +; + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %cond_true, label %ret + +cond_true: + %diff = sub i32 %i, %j + ret i32 %diff + +ret: + ret i32 5 +} + +; PR1768 +; CHECK-LABEL: @test10( +define i32 @test10(i32 %j, i32 %i) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: [[J_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[J]]) +; CHECK-NEXT: [[I_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[I]]) +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]] +; CHECK-NEXT: ret i32 [[DIFF]] +; CHECK: ret: +; CHECK-NEXT: ret i32 5 +; + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %cond_true, label %ret + +cond_true: + %diff = sub i32 %i, %j + ret i32 %diff + +ret: + ret i32 5 +} + +declare i32 @yogibar() + +; CHECK-LABEL: @test11( +define i32 @test11(i32 %x) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[V0:%.*]] = call i32 @yogibar() +; CHECK-NEXT: [[V1:%.*]] = call i32 @yogibar() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V0]], [[V1]] +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[NEXT:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: [[V1_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[V1]]) +; CHECK-NEXT: ret i32 [[V1_0]] +; CHECK: next: +; CHECK-NEXT: [[V0_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[V0]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X:%.*]], [[V0_0]] +; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE2:%.*]], label [[NEXT2:%.*]] +; CHECK: cond_true2: +; CHECK-NEXT: [[V0_0_1:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[V0_0]]) +; CHECK-NEXT: ret i32 [[V0_0_1]] +; CHECK: next2: +; CHECK-NEXT: ret i32 0 +; + %v0 = call i32 @yogibar() + %v1 = call i32 @yogibar() + %cmp = icmp eq i32 %v0, %v1 + br i1 %cmp, label %cond_true, label %next + +cond_true: + ret i32 %v1 + +next: + %cmp2 = icmp eq i32 %x, %v0 + br i1 %cmp2, label %cond_true2, label %next2 + +cond_true2: + ret i32 %v0 + +next2: + ret i32 0 +} + +; CHECK-LABEL: @test12( +define i32 @test12(i32 %x) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: br label [[RET:%.*]] +; CHECK: cond_false: +; CHECK-NEXT: br label [[RET]] +; CHECK: ret: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[X]], [[COND_TRUE]] ], [ [[X]], [[COND_FALSE]] ] +; CHECK-NEXT: ret i32 [[RES]] +; + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond_true, label %cond_false + +cond_true: + br label %ret + +cond_false: + br label %ret + +ret: + %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ] + ret i32 %res +} Index: test/Transforms/Util/PredicateInfo/testand.ll =================================================================== --- /dev/null +++ test/Transforms/Util/PredicateInfo/testand.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s + +declare void @foo(i1) +declare void @bar(i32) +declare void @llvm.assume(i1) + +define void @testand(i32 %x, i32 %y) { +; CHECK-LABEL: @testand( +; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] +; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK: both: +; CHECK: [[Y_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[Y]]) +; CHECK: [[YZ_0:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[YZ]]) +; CHECK: [[X_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X]]) +; CHECK: [[XZ_0:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[XZ]]) +; CHECK-NEXT: call void @foo(i1 [[XZ_0]]) +; CHECK-NEXT: call void @foo(i1 [[YZ_0]]) +; CHECK-NEXT: call void @bar(i32 [[X_0]]) +; CHECK-NEXT: call void @bar(i32 [[Y_0]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[Z]]) +; CHECK-NEXT: ret void +; + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = and i1 %xz, %yz + br i1 %z, label %both, label %nope +both: + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + ret void +nope: + call void @foo(i1 %z) + ret void +} +define void @testandsame(i32 %x, i32 %y) { +; CHECK-LABEL: @testandsame( +; CHECK-NEXT: [[XGT:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[XLT:%.*]] = icmp slt i32 [[X]], 100 +; CHECK-NEXT: [[Z:%.*]] = and i1 [[XGT]], [[XLT]] +; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK: both: +; CHECK: [[XLT_0:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[XLT]]) +; CHECK: [[X_0:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X]]) +; CHECK: [[X_0_1:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X_0]]) +; CHECK: [[XGT_0:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[XGT]]) +; CHECK-NEXT: call void @foo(i1 [[XGT_0]]) +; CHECK-NEXT: call void @foo(i1 [[XLT_0]]) +; CHECK-NEXT: call void @bar(i32 [[X_0_1]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[Z]]) +; CHECK-NEXT: ret void +; + %xgt = icmp sgt i32 %x, 0 + %xlt = icmp slt i32 %x, 100 + %z = and i1 %xgt, %xlt + br i1 %z, label %both, label %nope +both: + call void @foo(i1 %xgt) + call void @foo(i1 %xlt) + call void @bar(i32 %x) + ret void +nope: + call void @foo(i1 %z) + ret void +} + + + + +define void @testandassume(i32 %x, i32 %y) { +; CHECK-LABEL: @testandassume( +; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] +; CHECK: [[TMP1:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[XZ]]) +; CHECK: [[TMP2:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[X]]) +; CHECK: [[TMP3:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[YZ]]) +; CHECK: [[TMP4:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[Y]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[Z]]) +; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK: both: +; CHECK: [[DOT03:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[TMP4]]) +; CHECK: [[DOT02:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[TMP3]]) +; CHECK: [[DOT01:%.*]] = call i32 @llvm.predicateinfo.i32(i32 [[TMP2]]) +; CHECK: [[DOT0:%.*]] = call i1 @llvm.predicateinfo.i1(i1 [[TMP1]]) +; CHECK-NEXT: call void @foo(i1 [[DOT0]]) +; CHECK-NEXT: call void @foo(i1 [[DOT02]]) +; CHECK-NEXT: call void @bar(i32 [[DOT01]]) +; CHECK-NEXT: call void @bar(i32 [[DOT03]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[Z]]) +; CHECK-NEXT: ret void +; + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = and i1 %xz, %yz + call void @llvm.assume(i1 %z) + br i1 %z, label %both, label %nope +both: + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + ret void +nope: + call void @foo(i1 %z) + ret void +}