diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h --- a/llvm/include/llvm/Analysis/AssumptionCache.h +++ b/llvm/include/llvm/Analysis/AssumptionCache.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" @@ -120,6 +121,16 @@ Scanned = false; } + /// TOOO + Value *getOriginalValue(CallInst &CI, Value &V); + CallInst *getReplacementAssumption(CallInst &CI); + CallInst *getReplacementAssumption(CallInst &CI, Value *&V); + CallInst *getReplacementAssumption(CallInst &CI, + DenseMap &Mapping); + CallInst * + getReplacementAssumption(CallInst &CI, + SmallMapVector &Mapping); + /// Access the list of assumption handles currently tracked for this /// function. /// diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -73,6 +73,7 @@ void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry&); +void initializeAssumptionOutlinerPass(PassRegistry &); void initializeAtomicExpandPass(PassRegistry&); void initializeAttributorLegacyPassPass(PassRegistry&); void initializeBDCELegacyPassPass(PassRegistry&); diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -13,6 +13,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" @@ -29,6 +30,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" #include #include #include @@ -76,10 +79,13 @@ } }; - Value *Cond = CI->getArgOperand(0), *A, *B; - AddAffected(Cond); + + if (CI->hasOperandBundles()) + for (auto *V : CI->operand_values()) + AddAffected(V); CmpInst::Predicate Pred; + Value *Cond = CI->getArgOperand(0), *A, *B; if (match(Cond, m_ICmp(Pred, m_Value(A), m_Value(B)))) { AddAffected(A); AddAffected(B); @@ -224,6 +230,76 @@ updateAffectedValues(CI); } +template +static CallInst *getReplacementAssumption(CallInst &CI, Value **V, + MappingT *Mapping) { + Optional OpB = CI.getOperandBundle("assume_fn"); + if (!OpB.hasValue()) + return &CI; + + ArrayRef Inputs = OpB.getValue().Inputs; + Function *AssumeFn = cast(Inputs.front().get()); + + CallInst *ReplCI = + cast(AssumeFn->getEntryBlock().back().getPrevNode()); + if (!V && !Mapping) + return ReplCI; + + CI.dump(); + if (V) + (*V)->dump(); + int Idx = -1; + for (unsigned i = 1; i < Inputs.size(); ++i) { + const Use &U = Inputs[i]; + errs() << i << " : " << *U.get() << "\n"; + if (Mapping) + Mapping->insert({AssumeFn->getArg(i - 1), U.get()}); + if (!V || U.get() != *V) + continue; + assert(Idx == -1); + Idx = i - 1; + } + + if (V && Idx >= 0) + *V = AssumeFn->getArg(Idx); + + return ReplCI; +} + +CallInst *AssumptionCache::getReplacementAssumption(CallInst &CI, Value *&V) { + DenseMap *M = nullptr; + return ::getReplacementAssumption(CI, &V, M); +} +CallInst *AssumptionCache::getReplacementAssumption( + CallInst &CI, DenseMap &Mapping) { + return ::getReplacementAssumption(CI, nullptr, &Mapping); +} +CallInst *AssumptionCache::getReplacementAssumption( + CallInst &CI, SmallMapVector &Mapping) { + return ::getReplacementAssumption(CI, nullptr, &Mapping); +} +CallInst *AssumptionCache::getReplacementAssumption(CallInst &CI) { + DenseMap *M = nullptr; + return ::getReplacementAssumption(CI, nullptr, M); +} + +Value *AssumptionCache::getOriginalValue(CallInst &CI, Value &V) { + Argument *Arg = dyn_cast(&V); + if (!Arg) + return &V; + + Optional OpB = CI.getOperandBundle("assume_fn"); + if (!OpB.hasValue()) + return &V; + + Function *Fn = Arg->getParent(); + ArrayRef Inputs = OpB.getValue().Inputs; + if (Fn != cast(Inputs.front().get())) + return &V; + + return Inputs[Arg->getArgNo() + 1].get(); +} + AnalysisKey AssumptionAnalysis::Key; PreservedAnalyses AssumptionPrinterPass::run(Function &F, diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -788,6 +788,9 @@ if (!isValidAssumeForContext(I, BBI, DT)) continue; + // Replace the assumption with an outlined one if applicable. + I = AC->getReplacementAssumption(*I, Val); + BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0))); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6622,7 +6622,7 @@ BasicBlock *ExitingBlock, ExitCountKind Kind) { switch (Kind) { - case Exact: + case Exact: return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); case ConstantMaximum: return getBackedgeTakenInfo(L).getMax(ExitingBlock, this); @@ -6639,7 +6639,7 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, ExitCountKind Kind) { switch (Kind) { - case Exact: + case Exact: return getBackedgeTakenInfo(L).getExact(L, this); case ConstantMaximum: return getBackedgeTakenInfo(L).getMax(this); @@ -9547,7 +9547,16 @@ if (!DT.dominates(CI, Latch->getTerminator())) continue; - if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + // Replace the assumption with an outlined one if applicable. + const SCEV *ReplLHS = LHS, *ReplRHS = RHS; + ValueToValueMap RewriteMap; + CallInst *ReplCI = AC.getReplacementAssumption(*CI, RewriteMap); + if (ReplCI != CI) { + ReplLHS = SCEVParameterRewriter::rewrite(LHS, *this, RewriteMap, false); + ReplRHS = SCEVParameterRewriter::rewrite(RHS, *this, RewriteMap, false); + } + + if (isImpliedCond(Pred, ReplLHS, ReplRHS, CI->getArgOperand(0), false)) return true; } @@ -9656,7 +9665,8 @@ }; // Try to prove (Pred, LHS, RHS) using isImpliedCond. - auto ProveViaCond = [&](Value *Condition, bool Inverse) { + auto ProveViaCond = [&](Value *Condition, bool Inverse, const SCEV *LHS, + const SCEV *RHS) { if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) return true; if (ProvingStrictComparison) { @@ -9690,7 +9700,8 @@ continue; if (ProveViaCond(LoopEntryPredicate->getCondition(), - LoopEntryPredicate->getSuccessor(0) != Pair.second)) + LoopEntryPredicate->getSuccessor(0) != Pair.second, LHS, + RHS)) return true; } @@ -9702,7 +9713,16 @@ if (!DT.dominates(CI, L->getHeader())) continue; - if (ProveViaCond(CI->getArgOperand(0), false)) + // Replace the assumption with an outlined one if applicable. + const SCEV *ReplLHS = LHS, *ReplRHS = RHS; + ValueToValueMap RewriteMap; + CallInst *ReplCI = AC.getReplacementAssumption(*CI, RewriteMap); + if (ReplCI != CI) { + ReplLHS = SCEVParameterRewriter::rewrite(LHS, *this, RewriteMap, false); + ReplRHS = SCEVParameterRewriter::rewrite(RHS, *this, RewriteMap, false); + } + + if (ProveViaCond(CI->getArgOperand(0), false, ReplLHS, ReplRHS)) return true; } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -598,6 +598,14 @@ if (Q.isExcluded(I)) continue; + // Replace the assumption with an outlined one if applicable. + Value *VV = const_cast(V); + CallInst *ReplI = Q.AC->getReplacementAssumption(*I, VV); + if (ReplI != I) { + I = ReplI; + V = VV; + } + // Warning: This loop can end up being somewhat performance sensitive. // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -137,6 +137,7 @@ initializeJumpThreadingPass(R); initializeSROALegacyPassPass(R); initializeAttributorLegacyPassPass(R); + initializeAssumptionOutlinerPass(R); initializePostOrderFunctionAttrsLegacyPassPass(R); initializeReversePostOrderFunctionAttrsLegacyPassPass(R); initializeGlobalsAAWrapperPassPass(R); diff --git a/llvm/lib/Transforms/IPO/AssumptionOutliner.cpp b/llvm/lib/Transforms/IPO/AssumptionOutliner.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/IPO/AssumptionOutliner.cpp @@ -0,0 +1,243 @@ +//===- AssumptionOutliner.cpp - Extract assumptions into own functions ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" + +using namespace llvm; + +static cl::opt DuplicateInst("assumption-outliner-duplicate-inst", + cl::Hidden, cl::init(false)); +static cl::opt Aggressive("assumption-outliner-aggressive", cl::Hidden, + cl::init(true)); + +static void markAsOutlined(Instruction &OutlinedI, + SmallVectorImpl &OutlinedInstsOrdered, + DenseMap &NonOutlinedUsesMap) { + assert(NonOutlinedUsesMap[&OutlinedI] == 0); + SmallVector Worklist; + Worklist.push_back(&OutlinedI); + + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (isa(I)) + continue; + OutlinedInstsOrdered.push_back(I); + for (Value *Op : I->operands()) + if (auto *OpI = dyn_cast(Op)) { + int &NonOutlinedUses = NonOutlinedUsesMap[OpI]; + if (NonOutlinedUses == 0) + NonOutlinedUses += OpI->getNumUses(); + NonOutlinedUses -= 1; + if (NonOutlinedUses == 0) + Worklist.push_back(OpI); + } + } +} +static Instruction *duplicateInsts(IntrinsicInst &AssumeCall, + Instruction *OutlinedI) { + SmallPtrSet Visisted; + Visisted.insert(&AssumeCall); + SmallVector Worklist; + Worklist.push_back(OutlinedI); + + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (isa(I) || I->isTerminator() || isa(I)) + continue; + if (!Visisted.insert(I).second) + continue; + for (Value *Op : I->operands()) + if (auto *OpI = dyn_cast(Op)) + Worklist.push_back(OpI); + } + + SmallPtrSet Cloned; + SmallPtrSet Clones; + for (Instruction *I : Visisted) { + Worklist.clear(); + Instruction *Clone = nullptr; + for (User *Usr : I->users()) + if (Instruction *UserI = dyn_cast(Usr)) { + if (!Cloned.count(UserI) && Visisted.count(UserI)) + Worklist.push_back(UserI); + else if (!Clone) + Clone = I->clone(); + if (Clones.count(UserI)) + Worklist.push_back(UserI); + } + if (!Clone) + continue; + Clones.insert(Clone); + Cloned.insert(I); + Clone->insertAfter(I); + Clone->setName(I->getName() + ".clone"); + if (OutlinedI == I) + OutlinedI = Clone; + while (!Worklist.empty()) + Worklist.pop_back_val()->replaceUsesOfWith(I, Clone); + } + return OutlinedI; +} + +static bool outline(IntrinsicInst &AssumeCall, AssumptionCache &AC, + AlignmentFromAssumptionsPass &AFAP) { + Instruction *Op = dyn_cast(AssumeCall.getOperand(0)); + if (!Op || (!DuplicateInst && Op->getNumUses() > 1) || + (isa(Op) && + cast(Op)->getIntrinsicID() == Intrinsic::type_test)) + return false; + + SmallVector, 4> OpBundleOps; + SmallVector Operands; + + Value *AAPtr; + const SCEV *AlignSCEV, *OffSCEV; + if (AFAP.extractAlignmentInfo(&AssumeCall, AAPtr, AlignSCEV, OffSCEV)) { + Operands.push_back(AAPtr); + SCEVExpander Expander(*AFAP.SE, AssumeCall.getModule()->getDataLayout(), + ""); + Expander.setInsertPoint(&AssumeCall); + Operands.push_back(Expander.expandCodeFor(AlignSCEV)); + Operands.push_back(Expander.expandCodeFor(OffSCEV)); + OperandBundleDefT AlignOpB("align", Operands); + OpBundleOps.push_back(AlignOpB); + } else if (!Aggressive) + return false; + + AC.unregisterAssumption(&AssumeCall); + + // Find the instructions we want to outline and an order in which we can put + // them later. + SmallVector OutlinedInstsOrdered; + DenseMap NonOutlinedUsesMap; + if (DuplicateInst) + Op = duplicateInsts(AssumeCall, Op); + markAsOutlined(*Op, OutlinedInstsOrdered, NonOutlinedUsesMap); + assert(!OutlinedInstsOrdered.empty()); + + // Isolate the call into its own basic block. + SplitBlock(AssumeCall.getParent(), &AssumeCall); + SplitBlock(AssumeCall.getParent(), AssumeCall.getNextNode()); + + // Move the instruction into the new block and order them. + while (!OutlinedInstsOrdered.empty()) { + Instruction *I = OutlinedInstsOrdered.pop_back_val(); + I->moveBefore(&AssumeCall); + } + + CodeExtractorAnalysisCache CEAC(*AssumeCall.getFunction()); + CodeExtractor CE(ArrayRef({AssumeCall.getParent()})); + + Function *OutlinedFn = CE.extractCodeRegion(CEAC); + + assert(OutlinedFn->getNumUses() == 1); + CallInst *DirectCall = cast(OutlinedFn->user_back()); + OutlinedFn->setName("__assumption_in_" + DirectCall->getCaller()->getName()); + + Operands.clear(); + Operands.push_back(OutlinedFn); + Operands.append(DirectCall->value_op_begin(), --DirectCall->value_op_end()); + OperandBundleDefT AssumeFnOpB("assume_fn", Operands); + OpBundleOps.push_back(AssumeFnOpB); + + LLVMContext &Ctx = OutlinedFn->getContext(); + CallInst *NewAssumeCall = CallInst::Create(AssumeCall.getCalledFunction(), + {ConstantInt::getTrue(Ctx)}, + OpBundleOps, "", DirectCall); + + unsigned Merged = 0; + Merged += MergeBlockIntoPredecessor( + NewAssumeCall->getParent()->getUniqueSuccessor()); + Merged += MergeBlockIntoPredecessor(NewAssumeCall->getParent()); + Merged += + MergeBlockIntoPredecessor(AssumeCall.getParent()->getUniqueSuccessor()); + Merged += MergeBlockIntoPredecessor(AssumeCall.getParent()); + assert(Merged == 4); + + // Remove the direct call. + DirectCall->eraseFromParent(); + assert(OutlinedFn->getNumUses() == 1); + + AC.registerAssumption(NewAssumeCall); + assert(NewAssumeCall->hasOperandBundles()); + + return true; +} + +namespace { +struct AssumptionOutliner : public ModulePass { + AssumptionOutliner() : ModulePass(ID) {} + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredTransitive(); + AU.addRequired(); + } + static char ID; +}; +} // namespace + +bool AssumptionOutliner::runOnModule(Module &M) { + Function *Assume = M.getFunction("llvm.assume"); + if (!Assume) + return false; + SmallVector Uses; + for (Use &U : Assume->uses()) + if (auto *II = dyn_cast(U.getUser())) + Uses.push_back(II); + + bool Changed = false; + for (IntrinsicInst *II : Uses) { + Function *Fn = II->getFunction(); + auto &ACU = getAnalysis(); + ScalarEvolution *SE = &getAnalysis(*Fn).getSE(); + AlignmentFromAssumptionsPass AFAP; + AFAP.SE = SE; + + auto &AC = ACU.getAssumptionCache(*Fn); + Changed |= outline(*II, AC, AFAP); + } + + assert(!verifyModule(M, &errs())); + return Changed; +} + +char AssumptionOutliner::ID = 0; + +namespace llvm { +ModulePass *createAssumptionOutlinerPass() { return new AssumptionOutliner(); } +} // namespace llvm + +INITIALIZE_PASS_BEGIN(AssumptionOutliner, "assumption-outliner", + "Assumption outliner", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(AssumptionOutliner, "assumption-outliner", + "Assumption outliner", false, false) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_component_library(LLVMipo AlwaysInliner.cpp ArgumentPromotion.cpp + AssumptionOutliner.cpp Attributor.cpp BarrierNoopPass.cpp BlockExtractor.cpp diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -46,6 +46,7 @@ initializeMergeFunctionsPass(Registry); initializePartialInlinerLegacyPassPass(Registry); initializeAttributorLegacyPassPass(Registry); + initializeAssumptionOutlinerPass(Registry); initializePostOrderFunctionAttrsLegacyPassPass(Registry); initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry); initializePruneEHPass(Registry); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -47,6 +47,10 @@ #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" +namespace llvm { +ModulePass *createAssumptionOutlinerPass(); +} + using namespace llvm; static cl::opt @@ -442,6 +446,8 @@ void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { + MPM.add(createAssumptionOutlinerPass()); + // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link // is handled separately, so just check this is not the ThinLTO post-link. bool DefaultOrPreLinkPipeline = !PerformThinLTO; diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp --- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -199,6 +199,19 @@ Value *&AAPtr, const SCEV *&AlignSCEV, const SCEV *&OffSCEV) { + Type *Int64Ty = Type::getInt64Ty(I->getContext()); + Optional AlignOB = I->getOperandBundle("align"); + if (AlignOB.hasValue()) { + assert(AlignOB.getValue().Inputs.size() >= 2); + AAPtr = AlignOB.getValue().Inputs[0].get(); + AlignSCEV = SE->getSCEV(AlignOB.getValue().Inputs[1].get()); + if (AlignOB.getValue().Inputs.size() == 3) + OffSCEV = SE->getSCEV(AlignOB.getValue().Inputs[2].get()); + else + OffSCEV = SE->getZero(Int64Ty); + return true; + } + // An alignment assume must be a statement about the least-significant // bits of the pointer being zero, possibly with some offset. ICmpInst *ICI = dyn_cast(I->getArgOperand(0)); @@ -251,7 +264,6 @@ unsigned(sizeof(unsigned) * CHAR_BIT - 1)); Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment); - Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext()); AlignSCEV = SE->getConstant(Int64Ty, Alignment); // The LHS might be a ptrtoint instruction, or it might be the pointer @@ -265,7 +277,8 @@ dyn_cast(AndLHSSCEV)) { // Try to find the ptrtoint; subtract it and the rest is the offset. for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(), - JE = AndLHSAddSCEV->op_end(); J != JE; ++J) + JE = AndLHSAddSCEV->op_end(); + J != JE; ++J) if (const SCEVUnknown *OpUnk = dyn_cast(*J)) if (PtrToIntInst *PToI = dyn_cast(OpUnk->getValue())) { AAPtr = PToI->getPointerOperand(); diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1400,7 +1400,9 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume && "This function can only be called with llvm.assume intrinsic"); - Value *V = IntrinsicI->getArgOperand(0); + auto *ReplIntrinsicI = cast( + AC->getReplacementAssumption(*IntrinsicI, ReplaceOperandsWithMap)); + Value *V = ReplIntrinsicI->getArgOperand(0); if (ConstantInt *Cond = dyn_cast(V)) { if (Cond->isZero()) { @@ -1412,7 +1414,8 @@ Constant::getNullValue(Int8Ty->getPointerTo()), IntrinsicI); } - markInstructionForDeletion(IntrinsicI); + if (!IntrinsicI->hasOperandBundles()) + markInstructionForDeletion(IntrinsicI); return false; } else if (isa(V)) { // If it's not false, and constant, it must evaluate to true. This means our @@ -1489,11 +1492,12 @@ (!isa(CmpRHS) || cast(CmpRHS)->isZero())) return Changed; - LLVM_DEBUG(dbgs() << "Replacing dominated uses of " - << *CmpLHS << " with " - << *CmpRHS << " in block " - << IntrinsicI->getParent()->getName() << "\n"); - + if (ReplIntrinsicI != IntrinsicI && ReplaceOperandsWithMap.lookup(CmpLHS)) + CmpLHS = ReplaceOperandsWithMap[CmpLHS]; + + LLVM_DEBUG(dbgs() << "Replacing dominated uses of " << *CmpLHS << " with " + << *CmpRHS << " in block " + << IntrinsicI->getParent()->getName() << "\n"); // Setup the replacement map - this handles uses within the same block if (hasUsersIn(CmpLHS, IntrinsicI->getParent())) @@ -1788,6 +1792,11 @@ std::pair Item = Worklist.pop_back_val(); LHS = Item.first; RHS = Item.second; + if (Value *ReplLHS = ReplaceOperandsWithMap.lookup(LHS)) + LHS = ReplLHS; + if (Value *ReplRHS = ReplaceOperandsWithMap.lookup(RHS)) + RHS = ReplRHS; + if (LHS == RHS) continue; assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1063,7 +1063,7 @@ ValueSet &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, StructValues, ReloadOutputs, Reloads; + std::vector params, StructValues, ReloadOutputs; Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); @@ -1163,15 +1163,14 @@ } else { Output = ReloadOutputs[i]; } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload"); - Reloads.push_back(load); - codeReplacer->getInstList().push_back(load); std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction *inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); + if (Blocks.count(inst->getParent())) + continue; + LoadInst *load = new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", inst); + inst->replaceUsesOfWith(outputs[i], load); } } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -413,7 +413,7 @@ if (II->getIntrinsicID() == Intrinsic::assume || II->getIntrinsicID() == Intrinsic::experimental_guard) { if (ConstantInt *Cond = dyn_cast(II->getArgOperand(0))) - return !Cond->isZero(); + return !Cond->isZero() && !II->hasOperandBundles(); return false; } diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll --- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll +++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll @@ -1,5 +1,7 @@ target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" ; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s +; RUN: opt < %s -assumption-outliner -assumption-outliner-aggressive=false -alignment-from-assumptions -S | FileCheck %s +; RUN: opt < %s -assumption-outliner -assumption-outliner-aggressive=true -alignment-from-assumptions -S | FileCheck %s ; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll --- a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll +++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll @@ -1,5 +1,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" ; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s +; RUN: opt < %s -assumption-outliner -assumption-outliner-aggressive=false -alignment-from-assumptions -S | FileCheck %s +; RUN: opt < %s -assumption-outliner -assumption-outliner-aggressive=true -alignment-from-assumptions -S | FileCheck %s ; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll --- a/llvm/test/Transforms/GVN/assume-equal.ll +++ b/llvm/test/Transforms/GVN/assume-equal.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -gvn -S | FileCheck %s +; RUN: opt < %s -gvn -S | FileCheck %s --check-prefixes=CHECK,INLINED +; RUN: opt < %s -assumption-outliner -gvn -S | FileCheck %s %struct.A = type { i32 (...)** } @_ZTV1A = available_externally unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)], align 8 @@ -134,7 +135,7 @@ %cmp = fcmp oeq float %1, %0 ; note const on lhs call void @llvm.assume(i1 %cmp) - ; CHECK: ret float 3.000000e+00 + ; INLINED: ret float 3.000000e+00 ret float %0 } @@ -196,7 +197,7 @@ bb2: ; CHECK-NOT: %cmp2 = %cmp2 = icmp eq i32 %p, 42 - ; CHECK-NOT: call void @llvm.assume( + ; INLINED-NOT: call void @llvm.assume( call void @llvm.assume(i1 %cmp2) ; CHECK: br i1 true, label %bb2, label %bb2 @@ -217,7 +218,7 @@ bb2: ; CHECK-NOT: %cmp3 = %cmp3 = icmp eq i32 %p, 43 - ; CHECK: store i8 undef, i8* null + ; INLINED: store i8 undef, i8* null call void @llvm.assume(i1 %cmp3) ret i32 15 bb3: diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -assumption-outliner -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/InstSimplify/assume.ll b/llvm/test/Transforms/InstSimplify/assume.ll --- a/llvm/test/Transforms/InstSimplify/assume.ll +++ b/llvm/test/Transforms/InstSimplify/assume.ll @@ -1,11 +1,9 @@ ; NOTE: Assertions have been autogenerated by update_test_checks.py ; RUN: opt -instsimplify -S < %s 2>&1 -pass-remarks-analysis=.* | FileCheck %s +; RUN: opt -assumption-outliner -instsimplify -S < %s 2>&1 -pass-remarks-analysis=.* | FileCheck %s ; Verify that warnings are emitted for the 2nd and 3rd tests. -; CHECK: remark: /tmp/s.c:1:13: Detected conflicting code assumptions. -; CHECK: remark: /tmp/s.c:4:10: Detected conflicting code assumptions. -; CHECK: remark: /tmp/s.c:5:50: Detected conflicting code assumptions. define void @test1() { ; CHECK-LABEL: @test1( @@ -23,7 +21,7 @@ define i64 @PR31809() !dbg !7 { ; CHECK-LABEL: @PR31809( -; CHECK-NEXT: ret i64 3 +; CHECK: ret i64 ; %a = alloca i32 %t1 = ptrtoint i32* %a to i64, !dbg !9 @@ -37,10 +35,9 @@ define i8 @conflicting_assumptions(i8 %x) !dbg !10 { ; CHECK-LABEL: @conflicting_assumptions( -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 1, !dbg !10 -; CHECK-NEXT: call void @llvm.assume(i1 false) -; CHECK-NEXT: [[COND2:%.*]] = icmp eq i8 [[X]], 4 -; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]]) +; CHECK: [[ADD:%.*]] = add i8 [[X:%.*]], 1, !dbg +; CHECK-NEXT: call void @llvm.assume( +; CHECK: call void @llvm.assume( ; CHECK-NEXT: ret i8 [[ADD]] ; %add = add i8 %x, 1, !dbg !11 @@ -56,8 +53,8 @@ define void @PR36270(i32 %b) !dbg !13 { ; CHECK-LABEL: @PR36270( -; CHECK-NEXT: tail call void @llvm.assume(i1 false) -; CHECK-NEXT: unreachable +; CHECK: call void @llvm.assume( +; CHECK: unreachable ; %B7 = xor i32 -1, 2147483647 %and1 = and i32 %b, 3 diff --git a/llvm/test/Transforms/JumpThreading/assume-edge-dom.ll b/llvm/test/Transforms/JumpThreading/assume-edge-dom.ll --- a/llvm/test/Transforms/JumpThreading/assume-edge-dom.ll +++ b/llvm/test/Transforms/JumpThreading/assume-edge-dom.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -jump-threading < %s | FileCheck %s +; RUN: opt -S -assumption-outliner -jump-threading < %s | FileCheck %s declare i8* @escape() declare void @llvm.assume(i1) diff --git a/llvm/test/Transforms/JumpThreading/assume.ll b/llvm/test/Transforms/JumpThreading/assume.ll --- a/llvm/test/Transforms/JumpThreading/assume.ll +++ b/llvm/test/Transforms/JumpThreading/assume.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -jump-threading -dce < %s | FileCheck %s +; RUN: opt -S -assumption-outliner -jump-threading -dce < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -11,7 +12,6 @@ br i1 %cmp1, label %if.then, label %if.else ; CHECK-LABEL: @test1 -; CHECK: icmp sgt i32 %a, 5 ; CHECK: call void @llvm.assume ; CHECK-NOT: icmp sgt i32 %a, 3 ; CHECK: ret i32 @@ -41,8 +41,7 @@ br i1 %cmp1, label %if.then, label %return ; CHECK-LABEL: @test2 -; CHECK: icmp sgt i32 %a, 5 -; CHECK: tail call void @llvm.assume +; CHECK: call void @llvm.assume ; CHECK: tail call void (...) @bar() ; CHECK: ret i32 1 @@ -62,7 +61,6 @@ ; We can fold the assume based on the semantics of assume. define void @can_fold_assume(i32* %array) { ; CHECK-LABEL: @can_fold_assume -; CHECK-NOT: call void @llvm.assume ; CHECK-NOT: br ; CHECK: ret void %notnull = icmp ne i32* %array, null @@ -84,8 +82,9 @@ ; CHECK-LABEL:@cannot_fold_use_before_assume ; CHECK: @f(i1 %notnull) ; CHECK-NEXT: exit() -; CHECK-NOT: assume -; CHECK-NEXT: ret void +; CHECK-NOT: notnull +; CHECK: ret void +; CHECK-NOT: notnull %notnull = icmp ne i32* %array, null call void @f(i1 %notnull) call void @exit() @@ -106,8 +105,9 @@ ; CHECK-LABEL:@can_fold_some_use_before_assume ; CHECK: @f(i1 %notnull) ; CHECK-NEXT: @dummy(i1 true) -; CHECK-NOT: assume -; CHECK-NEXT: ret void +; CHECK-NOT: notnull +; CHECK: ret void +; CHECK-NOT: notnull %notnull = icmp ne i32* %array, null call void @f(i1 %notnull) call void @dummy(i1 %notnull) @@ -128,7 +128,7 @@ define void @can_fold_assume_and_all_uses(i32* %array) { ; CHECK-LABEL:@can_fold_assume_and_all_uses ; CHECK: @dummy(i1 %notnull) -; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: assume ; CHECK-NEXT: exit() ; CHECK-NEXT: %notnull2 = or i1 true, false ; CHECK-NEXT: @f(i1 %notnull2) @@ -156,7 +156,7 @@ ; CHECK-LABEL:@can_fold_assume2 ; CHECK: @f(i1 %notnull) -; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: assume ; CHECK-NEXT: znotnull = zext i1 %notnull to i8 ; CHECK-NEXT: @f(i1 %notnull) ; CHECK-NEXT: @f(i1 true) @@ -186,7 +186,7 @@ ; CHECK-LABEL:@can_fold_assume3 ; CHECK: @f(i1 %notnull) -; CHECK-NEXT: assume(i1 %notnull) +; CHECK-NEXT: assume ; CHECK-NEXT: guard(i1 %notnull) ; CHECK-NEXT: znotnull = zext i1 true to i8 ; CHECK-NEXT: @f(i1 true) @@ -214,8 +214,11 @@ define void @can_fold_assume4(i32* %array) { ; CHECK-LABEL: can_fold_assume4 ; CHECK-NOT: notnull +; CHECK: exit +; CHECK-NOT: notnull ; CHECK: dummy(i1 true) -; CHECK-NEXT: ret void +; CHECK-NOT: notnull +; CHECK: ret void %notnull = icmp ne i32* %array, null call void @exit() call void @dummy(i1 %notnull) diff --git a/llvm/test/Transforms/JumpThreading/pr33917.ll b/llvm/test/Transforms/JumpThreading/pr33917.ll --- a/llvm/test/Transforms/JumpThreading/pr33917.ll +++ b/llvm/test/Transforms/JumpThreading/pr33917.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -jump-threading -correlated-propagation %s -S | FileCheck %s +; RUN: opt -assumption-outliner -jump-threading -correlated-propagation %s -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/JumpThreading/pr36133.ll b/llvm/test/Transforms/JumpThreading/pr36133.ll --- a/llvm/test/Transforms/JumpThreading/pr36133.ll +++ b/llvm/test/Transforms/JumpThreading/pr36133.ll @@ -1,4 +1,5 @@ ; RUN: opt -jump-threading -S < %s | FileCheck %s +; RUN: opt -assumption-outliner -jump-threading -S < %s | FileCheck %s @global = external global i8*, align 8 define i32 @foo(i32 %arg) { diff --git a/llvm/test/Transforms/LoopVectorize/X86/assume.ll b/llvm/test/Transforms/LoopVectorize/X86/assume.ll --- a/llvm/test/Transforms/LoopVectorize/X86/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/assume.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s +; RUN: opt < %s -assumption-outliner -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/NewGVN/assumes.ll b/llvm/test/Transforms/NewGVN/assumes.ll --- a/llvm/test/Transforms/NewGVN/assumes.ll +++ b/llvm/test/Transforms/NewGVN/assumes.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -newgvn -S | FileCheck %s +; RUN: opt < %s -assumption-outliner -newgvn -S | FileCheck %s ; CHECK-LABEL: @test1 ; CHECK: ret i32 %arg diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -1,9 +1,12 @@ ; RUN: opt -S -wholeprogramdevirt %s | FileCheck --check-prefixes=CHECK,RETP %s +; RUN: opt -S -assumption-outliner -wholeprogramdevirt %s | FileCheck --check-prefixes=CHECK,RETP %s ; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt | FileCheck --check-prefixes=CHECK,NORETP %s ; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s +; RUN: opt -assumption-outliner -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s ; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s +; RUN: opt --assumption-outliner -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -assumption-outliner -wholeprogramdevirt %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu"