Index: include/llvm/IR/GlobalVariable.h =================================================================== --- include/llvm/IR/GlobalVariable.h +++ include/llvm/IR/GlobalVariable.h @@ -31,6 +31,7 @@ class Constant; template class SymbolTableListTraits; +class IntrinsicInst; class GlobalVariable : public GlobalObject, public ilist_node { friend class SymbolTableListTraits; @@ -45,6 +46,10 @@ // can change from its initial // value before global // initializers are run? + IntrinsicInst *InvariantStartInst; // Transient + + void checkInvariantStartInstruction(IntrinsicInst *II); + public: // allocate space for exactly one operand void *operator new(size_t s) { @@ -144,6 +149,14 @@ bool isConstant() const { return isConstantGlobal; } void setConstant(bool Val) { isConstantGlobal = Val; } + IntrinsicInst *getInvariantStartInstruction() const { + return InvariantStartInst; + } + void setInvariantStartInstruction(IntrinsicInst *II) { + checkInvariantStartInstruction(II); + InvariantStartInst = II; + } + bool isExternallyInitialized() const { return isExternallyInitializedConstant; } Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -34,6 +34,7 @@ class ConstantRange; class DataLayout; class LLVMContext; +class IntrinsicInst; enum AtomicOrdering { NotAtomic = 0, @@ -75,6 +76,9 @@ /// class AllocaInst : public UnaryInstruction { Type *AllocatedType; + IntrinsicInst *InvariantStartInst; // Transient. + + void checkInvariantStartInstruction(IntrinsicInst *II); protected: // Note: Instruction needs to be a friend here to call cloneImpl. @@ -132,6 +136,14 @@ } void setAlignment(unsigned Align); + IntrinsicInst *getInvariantStartInstruction() const { + return InvariantStartInst; + } + void setInvariantStartInstruction(IntrinsicInst *II) { + checkInvariantStartInstruction(II); + InvariantStartInst = II; + } + /// isStaticAlloca - Return true if this alloca is in the entry block of the /// function and is a constant size. If so, the code generator will fold it /// into the prolog/epilog code, so it is basically free. @@ -4749,6 +4761,41 @@ } }; +//===----------------------------------------------------------------------===// +// Processing invariant_start/end intrinsics +//===----------------------------------------------------------------------===// + +IntrinsicInst *getInvariantStartInstruction(const Value* Arg); +void setInvariantStartInstruction(Value* Arg, IntrinsicInst *Val); +bool processInvariantIntrinsics(IntrinsicInst* II); + +struct PreservedInvariantInfo { + IntrinsicInst *II; + Value *Load; + PreservedInvariantInfo(): II(nullptr), Load(nullptr) { } +}; + +class PreserveInvariantInfo { + IntrinsicInst *PreservedII; + Value *LoadI; + void CheckPreservedInfo(); +public: + PreserveInvariantInfo(IntrinsicInst *II, Value *LI) + : PreservedII(II), LoadI(LI) { + if (PreservedII) + CheckPreservedInfo(); + } + ~PreserveInvariantInfo() { + if (PreservedII) + setInvariantStartInstruction(LoadI, PreservedII); + } +}; + +void setPreservedInvariantInfo(PreservedInvariantInfo &Preserved, + BasicBlock::iterator &ScanBackwardFrom, + Value *Query, BasicBlock *BB, + bool SkipToInvariantStart = false); + } // End llvm namespace #endif Index: lib/Analysis/AliasAnalysis.cpp =================================================================== --- lib/Analysis/AliasAnalysis.cpp +++ lib/Analysis/AliasAnalysis.cpp @@ -220,6 +220,12 @@ // If the pointer is a pointer to constant memory, then it could not have // been modified by this store. + // NOTE: We do not store into written 'writeonce' memory, which means that + // the store to a given address always precedes an invariant_start on it. + // So, if the address here is written to, then assume that it is not. + // FIXME: Handle this better, in combination with + // processInvariantIntrisics(), for GVN to skip straight to the store + // during memory dependence analysis. if (pointsToConstantMemory(Loc)) return MRI_NoModRef; } Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -469,7 +469,8 @@ /// Returns whether the given pointer value points to memory that is local to /// the function, with global constants being considered local to all -/// functions. +/// functions. Initialized global writeonces are also considered local to +/// all functions. bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { assert(Visited.empty() && "Visited must be cleared after use!"); @@ -488,12 +489,16 @@ if (OrLocal && isa(V)) continue; + if (const AllocaInst *AI = dyn_cast(V)) + if (AI->getInvariantStartInstruction()) + continue; + // A global constant counts as local memory for our purposes. if (const GlobalVariable *GV = dyn_cast(V)) { // Note: this doesn't require GV to be "ODR" because it isn't legal for a // global to be marked constant in some modules and non-constant in // others. GV may even be a declaration, not a definition. - if (!GV->isConstant()) { + if (!GV->isConstant() && !GV->getInvariantStartInstruction()) { Visited.clear(); return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } @@ -627,6 +632,12 @@ return false; } +static bool isInvariantIntrinsic(ImmutableCallSite CS) { + const IntrinsicInst *II = dyn_cast(CS.getInstruction()); + return II && (II->getIntrinsicID() == Intrinsic::invariant_start || + II->getIntrinsicID() == Intrinsic::invariant_end); +} + /// Checks to see if the specified callsite can clobber the specified memory /// object. /// @@ -688,6 +699,10 @@ if (isAssumeIntrinsic(CS)) return MRI_NoModRef; + // *.invariant.* intrinsics follow the same pattern as assume intrinsic. + if (isInvariantIntrinsic(CS)) + return MRI_NoModRef; + // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS, Loc); } @@ -700,6 +715,10 @@ if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2)) return MRI_NoModRef; + // *.invariant.* intrinsics follow the same pattern as assume intrinsic. + if (isInvariantIntrinsic(CS1) || isInvariantIntrinsic(CS2)) + return MRI_NoModRef; + // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS1, CS2); } Index: lib/Analysis/Loads.cpp =================================================================== --- lib/Analysis/Loads.cpp +++ lib/Analysis/Loads.cpp @@ -167,8 +167,10 @@ /// threading in part by eliminating partially redundant loads. /// At that point, the value of MaxInstsToScan was already set to '6' /// without documented explanation. +/// We have bumped up this number to '8' to improve the chnages of +/// behaviorial match when -instcombine is run after or without -inline. cl::opt -llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, +llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(8), cl::Hidden, cl::desc("Use this to specify the default maximum number of instructions " "to scan backward from a given instruction, when searching for " "available loaded value")); @@ -208,6 +210,15 @@ Value *StrippedPtr = Ptr->stripPointerCasts(); + + // We're about to scan backwards. Preserve the initial invariant_start + // intrinsic marking on this load, for subsequent instructions. + // First, compute the info to preserve (and do not skip any instruction). + // Then, actually preserve the info before backward scanning starts. + PreservedInvariantInfo Preserved; + setPreservedInvariantInfo(Preserved, ScanFrom, StrippedPtr, ScanBB); + PreserveInvariantInfo PIO(Preserved.II, Preserved.Load); + while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they // would affect codegen). @@ -215,6 +226,18 @@ if (isa(Inst)) continue; + // Same for invariant intrinsics. + if (IntrinsicInst *II = dyn_cast(Inst)) { + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + if (II == Preserved.II) + // We did not skip any instruction earlier. So, we must express that + // the given load is no longer pointing to constant memory. + llvm::setInvariantStartInstruction(Preserved.Load, nullptr); + continue; + } else if (II->getIntrinsicID() == Intrinsic::invariant_end) + continue; + } + // Restore ScanFrom to expected value in case next test succeeds ScanFrom++; Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -423,6 +423,17 @@ isInvariantLoad = true; } + // We're about to scan backwards. Preserve the initial invariant_start + // intrinsic marking on this load, for subsequent instructions. + // First, compute the info to preserve and prepare to skip instructions + // that need no further processing. + // Then, actually preserve the info before backward scanning starts. + PreservedInvariantInfo Preserved; + if (isLoad && QueryInst) + setPreservedInvariantInfo(Preserved, ScanIt, QueryInst, BB, + /*SkipToInvariantStart =*/ true); + PreserveInvariantInfo PIO(Preserved.II, Preserved.Load); + const DataLayout &DL = BB->getModule()->getDataLayout(); // Create a numbered basic block to lazily compute and cache instruction Index: lib/IR/Globals.cpp =================================================================== --- lib/IR/Globals.cpp +++ lib/IR/Globals.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" @@ -143,6 +144,11 @@ // GlobalVariable Implementation //===----------------------------------------------------------------------===// +void GlobalVariable::checkInvariantStartInstruction(IntrinsicInst *II) { + assert((!II || II->getIntrinsicID() == Intrinsic::invariant_start) && + "Given intrinsic instruction is not invariant_start"); +} + GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, ThreadLocalMode TLMode, unsigned AddressSpace, @@ -151,7 +157,8 @@ OperandTraits::op_begin(this), InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), - isExternallyInitializedConstant(isExternallyInitialized) { + isExternallyInitializedConstant(isExternallyInitialized), + InvariantStartInst(nullptr) { setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && @@ -169,7 +176,8 @@ OperandTraits::op_begin(this), InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), - isExternallyInitializedConstant(isExternallyInitialized) { + isExternallyInitializedConstant(isExternallyInitialized), + InvariantStartInst(nullptr) { setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && Index: lib/IR/Instructions.cpp =================================================================== --- lib/IR/Instructions.cpp +++ lib/IR/Instructions.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" @@ -1133,6 +1134,11 @@ // AllocaInst Implementation //===----------------------------------------------------------------------===// +void AllocaInst::checkInvariantStartInstruction(IntrinsicInst *II) { + assert((!II || II->getIntrinsicID() == Intrinsic::invariant_start) && + "Given intrinsic instruction is not invariant_start"); +} + static Value *getAISize(LLVMContext &Context, Value *Amt) { if (!Amt) Amt = ConstantInt::get(Type::getInt32Ty(Context), 1); @@ -1163,7 +1169,7 @@ const Twine &Name, Instruction *InsertBefore) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore), - AllocatedType(Ty) { + AllocatedType(Ty), InvariantStartInst(nullptr) { setAlignment(Align); assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); @@ -1173,7 +1179,7 @@ const Twine &Name, BasicBlock *InsertAtEnd) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd), - AllocatedType(Ty) { + AllocatedType(Ty), InvariantStartInst(nullptr) { setAlignment(Align); assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); @@ -3989,3 +3995,82 @@ LLVMContext &Context = getContext(); return new UnreachableInst(Context); } + +//===----------------------------------------------------------------------===// +// Processing invariant_start/end intrinsics +//===----------------------------------------------------------------------===// + +void llvm::PreserveInvariantInfo::CheckPreservedInfo() { + assert(PreservedII->getIntrinsicID() == Intrinsic::invariant_start && + "Preserved instruction must be an invariant_start intrinsic"); + assert(LoadI && "Can't preserve an intrinsic instruction for no load."); +} + +/// If the given Query is a load from writeonce readonly memory, we can speed +/// backward scaning by jumping to the associated invariant_start instruction, +/// *if* the instruction is in the BB block. +/// Then, since we would be scanning backward, undo the invariant_start +/// intrinsic marking so that getModRefInfo() knows that the load does not +/// point to constant memory from this point on. +void llvm::setPreservedInvariantInfo(PreservedInvariantInfo &Preserved, + BasicBlock::iterator &ScanBackwardFrom, + Value *Query, BasicBlock *BB, + bool SkipToInvariantStart) { + if (LoadInst *LI = dyn_cast(Query)) { + Value *I = LI->getPointerOperand()->stripPointerCasts(); + if (IntrinsicInst *II = llvm::getInvariantStartInstruction(I)) { + // NOTE: II's parent block can only either be the same as or contain + // ScanIt's parent block (i.e. BB). When not the same, set + // ScanIt to the begin of BB to skip the backward traversal. + if (II->getParent() == ScanBackwardFrom->getParent()) { + // If there is anything to scan backward at all, then we must still + // be processing BB, and this load no longer points to constant + // memory. Record info to preserve before reseting it. + Preserved.II = II; + Preserved.Load = I; + if (SkipToInvariantStart) { + ScanBackwardFrom = II; + llvm::setInvariantStartInstruction(I, nullptr); + } + } + else if (SkipToInvariantStart) + ScanBackwardFrom = BB->begin(); + } + } +} + +IntrinsicInst *llvm::getInvariantStartInstruction(const Value* Arg) { + if (!Arg) return nullptr; + if (const AllocaInst* AI = dyn_cast(Arg)) + return AI->getInvariantStartInstruction(); + if (const GlobalVariable* GV = dyn_cast(Arg)) + if (!GV->isConstant()) + return GV->getInvariantStartInstruction(); + return nullptr; +} + +void llvm::setInvariantStartInstruction(Value* Arg, IntrinsicInst *Val) { + if (!Arg) return; + if (GlobalVariable* GV = dyn_cast(Arg)) + GV->setInvariantStartInstruction(Val); + if (AllocaInst* AI = dyn_cast(Arg)) + AI->setInvariantStartInstruction(Val); +} + +// Process @llvm.invariant.start/end intrinsics. +bool llvm::processInvariantIntrinsics(IntrinsicInst* II) { + assert(II && "Can't mark a null instruction."); + + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + llvm::Value *Addr = II->getArgOperand(1)->stripPointerCasts(); + setInvariantStartInstruction(Addr, II); + return true; + } + else if (II->getIntrinsicID() == Intrinsic::invariant_end) { + llvm::Value *Addr = II->getArgOperand(2)->stripPointerCasts(); + if (getInvariantStartInstruction(Addr)) + setInvariantStartInstruction(Addr, nullptr); + return true; + } + return false; +} Index: lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- lib/Transforms/IPO/GlobalOpt.cpp +++ lib/Transforms/IPO/GlobalOpt.cpp @@ -2191,10 +2191,13 @@ /// control flows into, or null upon return. bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB); - Constant *getVal(Value *V) { + Constant *getVal(Value *V, bool CheckComputed = true) { if (Constant *CV = dyn_cast(V)) return CV; Constant *R = ValueStack.back().lookup(V); - assert(R && "Reference to an uncomputed value!"); + + // Allow references to uncomputed values from processInvariantIntrinsics(). + if (CheckComputed) + assert(R && "Reference to an uncomputed value!"); return R; } @@ -2210,6 +2213,10 @@ return Invariants; } + const SmallPtrSetImpl &getReadOnlys() const { + return ReadOnlys; + } + private: Constant *ComputeLoadResult(Constant *P); @@ -2237,6 +2244,10 @@ /// static constructor. SmallPtrSet Invariants; + /// ReadOnlys - These global variables are writeonce variables that + /// have been marked readonly by the static constructor. + SmallPtrSet ReadOnlys; + /// SimpleConstants - These are constants we have checked and know to be /// simple enough to live in a static initializer of a global. SmallPtrSet SimpleConstants; @@ -2494,8 +2505,13 @@ Size->getValue().getLimitedValue() >= DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); - DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV + DEBUG(dbgs() << "Found a global var that is an invariant (constant): " << *GV << "\n"); + } + else if (GV->getInvariantStartInstruction()) { + ReadOnlys.insert(GV); + DEBUG(dbgs() << "Found a global var that is a readonly writeonce: " << *GV + << "\n"); } else { DEBUG(dbgs() << "Found a global var, but can not treat it as an " "invariant.\n"); @@ -2619,6 +2635,84 @@ } } +static void processInvariantIntrinsics(Evaluator &Eval, Function *F) { + + // Scan the block to process invariant intrinsics, tracing whatever + // call chain that can be traced. + // Without this, invariant intrinsics on global variables, can only be + // processed when the constructor calls are inlined. + // TODO: Instead of forcing this tracing, can we rely on -O1's always_inline + // or -O2's inline? + BasicBlock *BB = F->begin(); + while (BB) { + BasicBlock* NextBB = nullptr; + BasicBlock::iterator CurInst = BB->begin(); + + while (CurInst) { + + if (isa(CurInst) || isa(CurInst)) { + CallSite CS(CurInst); + + if (IntrinsicInst *II = dyn_cast(CurInst)) + processInvariantIntrinsics(II); + + // Debug info, inline asm, intrinsics, ... + // can safely be ignored here. + if (isa(CS.getInstruction()) || + isa(CS.getCalledValue()) || + dyn_cast(CS.getInstruction())) { + ++CurInst; + continue; + } + Function *Callee = + dyn_cast_or_null(Eval.getVal(CS.getCalledValue(), + /*CheckComputed =*/ false)); + if (!Callee || Callee->mayBeOverridden()) + break; + + if (!Callee->isDeclaration() && + !Callee->getFunctionType()->isVarArg()) { + processInvariantIntrinsics(Eval, Callee); + } + } else if (isa(CurInst)) { + if (BranchInst *BI = dyn_cast(CurInst)) { + if (BI->isUnconditional()) + NextBB = BI->getSuccessor(0); + else { + if (ConstantInt *Cond = + dyn_cast_or_null(Eval.getVal(BI->getCondition(), + /*CheckComputed =*/ false))) + NextBB = BI->getSuccessor(!Cond->getZExtValue()); + } + } else if (SwitchInst *SI = dyn_cast(CurInst)) { + if(ConstantInt *Val = + dyn_cast_or_null( + Eval.getVal(SI->getCondition(), /*CheckComputed =*/ false))) + NextBB = SI->findCaseValue(Val).getCaseSuccessor(); + } else if (IndirectBrInst *IBI = dyn_cast(CurInst)) { + Value *Val = + Eval.getVal(IBI->getAddress(), + /*CheckComputed =*/ false)->stripPointerCasts(); + if (BlockAddress *BA = dyn_cast_or_null(Val)) + NextBB = BA->getBasicBlock(); + NextBB = nullptr; + } else if (isa(CurInst)) + NextBB = nullptr; + break; + } + + if (InvokeInst *II = dyn_cast(CurInst)) { + NextBB = II->getNormalDest(); + break; + } + + ++CurInst; + } + + BB = NextBB; + } +} + /// EvaluateFunction - Evaluate a call to function F, returning true if /// successful, false if we can't evaluate it. ActualArgs contains the formal /// arguments for the function. @@ -2647,6 +2741,10 @@ BasicBlock::iterator CurInst = CurBB->begin(); + // Scan the block to process invariant intrinsics. + // This will mark 'writeonce' global variables as written. + processInvariantIntrinsics(*this, F); + while (1) { BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); @@ -2706,6 +2804,11 @@ CommitValueTo(I->second, I->first); for (GlobalVariable *GV : Eval.getInvariants()) GV->setConstant(true); + + for (GlobalVariable *GV : Eval.getReadOnlys()) { + assert(GV->getInvariantStartInstruction() && + "Only readonly writeonce global vars are allowed here."); + } } return EvalSuccess; Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -470,15 +470,21 @@ for (Instruction &I : BB) { CallSite CS(cast(&I)); // If this isn't a call, or it is a call to an intrinsic, it can - // never be inlined. - if (!CS || isa(I)) + // never be inlined. *.invariant.* intrinsics are excepted because + // they should processed when inlining other calls. + //if (!CS || isa(I)) + if (!CS) continue; + if (IntrinsicInst *II = dyn_cast(&I)) + if (II->getIntrinsicID() != Intrinsic::invariant_start && + II->getIntrinsicID() != Intrinsic::invariant_end) + continue; // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. if (Function *Callee = CS.getCalledFunction()) - if (Callee->isDeclaration()) + if (Callee->isDeclaration() && !isa(I)) continue; CallSites.push_back(std::make_pair(CS, -1)); @@ -514,7 +520,12 @@ // CallSites may be modified inside so ranged for loop can not be used. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { CallSite CS = CallSites[CSi].first; - + + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + if (processInvariantIntrinsics(II)) + continue; + } + Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1456,6 +1456,10 @@ break; } + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + processInvariantIntrinsics(II); + break; case Intrinsic::experimental_gc_relocate: { // Translate facts known about a pointer before relocating into // facts about the relocate value, while being careful to Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1953,6 +1953,17 @@ uint64_t DontKnow = CI->isZero() ? -1ULL : 0; ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow)); } + + // If this is a paired *.invariant.start, then erase it's *.end. + if (II->getIntrinsicID() == Intrinsic::invariant_start && + !I->use_empty()) { + IntrinsicInst *User = + dyn_cast(cast(*I->user_begin())); + assert(I->hasOneUse() && User && + User->getIntrinsicID() == Intrinsic::invariant_end && + "The paired instruction should be an invariant_end."); + EraseInstFromFunction(*User); + } } EraseInstFromFunction(*I); } Index: lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- lib/Transforms/Scalar/EarlyCSE.cpp +++ lib/Transforms/Scalar/EarlyCSE.cpp @@ -518,6 +518,12 @@ continue; } + // TODO: Not needed? + if (IntrinsicInst *II = dyn_cast(Inst)) { + if (processInvariantIntrinsics(II)) + continue; + } + // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) { Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -2280,6 +2280,11 @@ if (isa(I)) return false; + if (IntrinsicInst *IntrinsicI = dyn_cast(I)) { + if (processInvariantIntrinsics(IntrinsicI)) + return false; + } + // If the instruction can be easily simplified then do so now in preference // to value numbering it. Value numbering often exposes redundancies, for // example if it determines that %y is equal to %x then the instruction