Index: docs/LangRef.html =================================================================== --- docs/LangRef.html +++ docs/LangRef.html @@ -330,6 +330,8 @@
  • 'llvm.objectsize' Intrinsic
  • + 'llvm.invariant' Intrinsic
  • +
  • 'llvm.expect' Intrinsic
  • 'llvm.donothing' Intrinsic
  • @@ -9057,6 +9059,34 @@

    + 'llvm.invariant' Intrinsic +

    + +
    + +
    Syntax:
    +
    +  declare void @llvm.invariant(i1 %cond)
    +
    + +
    Overview:
    +

    The 'llvm.invariant' intrinsic allows the optimizer to assume + that the provided condition is true. This information can then be used + in simplifying other parts of the code.

    + +
    Arguments:
    + +

    The condition which the optimizer may assume is always true.

    + +
    Semantics:
    + +

    The intrinsic allows the optimizer to assume that the provided condition is + always true. No code is generated for this intrinsic, and instructions that + contribute only to the provided condition are not used for code generation.

    +
    + + +

    'llvm.donothing' Intrinsic

    Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -35,6 +35,9 @@ /** See llvm::createAggressiveDCEPass function. */ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM); +/** See llvm::createAlignmentInvPropPass function. */ +void LLVMAddAlignmentInvPropPass(LLVMPassManagerRef PM); + /** See llvm::createCFGSimplificationPass function. */ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); Index: include/llvm/Analysis/EphemeralValues.h =================================================================== --- /dev/null +++ include/llvm/Analysis/EphemeralValues.h @@ -0,0 +1,56 @@ +//===- EphemeralValues.h - Ephemeral value analysis -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Calculate ephemeral values - those used only (indirectly) by invariants. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_EPHEMERAL_VALUES_H +#define LLVM_ANALYSIS_EPHEMERAL_VALUES_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/Pass.h" + +namespace llvm { + +class Value; +class raw_ostream; + +//===----------------------------------------------------------------------===// +/// @brief Analysis that finds ephemeral values. +class EphemeralValues : public ModulePass { + DenseSet EphValues; + + EphemeralValues(const EphemeralValues &) LLVM_DELETED_FUNCTION; + const EphemeralValues &operator=(const EphemeralValues &) LLVM_DELETED_FUNCTION; + +public: + // Returns true if the provided value is ephemeral. + bool isEphemeralValue(Value *V) const { + return EphValues.count(V); + } + bool isEphemeralValue(const Value *V) const { + return isEphemeralValue(const_cast(V)); + } + +public: + static char ID; + explicit EphemeralValues(); + + /// @name ModulePass interface + //@{ + virtual bool runOnModule(Module &M); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void print(raw_ostream &OS, const Module *M) const; + //@} +}; + +} // End llvm namespace +#endif + Index: include/llvm/Analysis/InlineCost.h =================================================================== --- include/llvm/Analysis/InlineCost.h +++ include/llvm/Analysis/InlineCost.h @@ -27,6 +27,7 @@ class CallSite; class DataLayout; + class EphemeralValues; namespace InlineConstants { // Various magic constants used to adjust heuristics. @@ -106,11 +107,14 @@ class InlineCostAnalyzer { // DataLayout if available, or null. const DataLayout *TD; + // EphemeralValues if available, or null. + const EphemeralValues *EV; public: - InlineCostAnalyzer(): TD(0) {} + InlineCostAnalyzer(): TD(0), EV(0) {} void setDataLayout(const DataLayout *TData) { TD = TData; } + void setEphemeralValues(const EphemeralValues *EVals) { EV = EVals; } /// \brief Get an InlineCost object representing the cost of inlining this /// callsite. Index: include/llvm/Analysis/Passes.h =================================================================== --- include/llvm/Analysis/Passes.h +++ include/llvm/Analysis/Passes.h @@ -66,6 +66,12 @@ //===--------------------------------------------------------------------===// // + // createEphemeralValuesPass - This pass identifies ephemeral values. + // + ModulePass *createEphemeralValuesPass(); + + //===--------------------------------------------------------------------===// + // /// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows /// about the semantics of a set of libcalls specified by LCI. The newly /// constructed pass takes ownership of the pointer that is provided. Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -66,6 +66,7 @@ void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlwaysInlinerPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); +void initializeAlignmentInvPropPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAliasAnalysisPass(PassRegistry&); void initializeBasicCallGraphPass(PassRegistry&); @@ -107,6 +108,7 @@ void initializeEdgeBundlesPass(PassRegistry&); void initializeEdgeProfilerPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); +void initializeEphemeralValuesPass(PassRegistry&); void initializePathProfilerPass(PassRegistry&); void initializeGCOVProfilerPass(PassRegistry&); void initializeAddressSanitizerPass(PassRegistry&); Index: include/llvm/Intrinsics.td =================================================================== --- include/llvm/Intrinsics.td +++ include/llvm/Intrinsics.td @@ -234,6 +234,8 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; +def int_invariant : Intrinsic<[], [llvm_i1_ty], [IntrNoMem]>; + // Stack Protector Intrinsic - The stackprotector intrinsic writes the stack // guard to the correct place on the stack frame. def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -50,6 +50,7 @@ (void) llvm::createAliasAnalysisCounterPass(); (void) llvm::createAliasDebugger(); (void) llvm::createArgumentPromotionPass(); + (void) llvm::createAlignmentInvPropPass(); (void) llvm::createBasicAliasAnalysisPass(); (void) llvm::createLibCallAliasAnalysisPass(0); (void) llvm::createScalarEvolutionAliasAnalysisPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -32,6 +32,12 @@ //===----------------------------------------------------------------------===// // +// AlignmentInvProp - A worklist driven alignment assumption propagation pass +// +FunctionPass *createAlignmentInvPropPass(); + +//===----------------------------------------------------------------------===// +// // SCCP - Sparse conditional constant propagation. // FunctionPass *createSCCPPass(); Index: lib/Analysis/Analysis.cpp =================================================================== --- lib/Analysis/Analysis.cpp +++ lib/Analysis/Analysis.cpp @@ -42,6 +42,7 @@ initializePostDomPrinterPass(Registry); initializePostDomOnlyViewerPass(Registry); initializePostDomOnlyPrinterPass(Registry); + initializeEphemeralValuesPass(Registry); initializeIVUsersPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -17,6 +17,7 @@ DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp + EphemeralValues.cpp IVUsers.cpp InlineCost.cpp InstCount.cpp Index: lib/Analysis/CodeMetrics.cpp =================================================================== --- lib/Analysis/CodeMetrics.cpp +++ lib/Analysis/CodeMetrics.cpp @@ -69,6 +69,7 @@ return false; case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::invariant: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::lifetime_start: Index: lib/Analysis/EphemeralValues.cpp =================================================================== --- /dev/null +++ lib/Analysis/EphemeralValues.cpp @@ -0,0 +1,114 @@ +//===------------------------ EphemeralValues.cpp ------------------------===// +// Code to perform Alignment Invariant Propagation +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ephemeral value determination. +// +//===----------------------------------------------------------------------===// + +#define EV_NAME "eph-values" +#define DEBUG_TYPE EV_NAME +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/EphemeralValues.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Constant.h" +#include "llvm/DataLayout.h" +#include "llvm/Instruction.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char EphemeralValues::ID = 0; +static const char ev_name[] = "Ephemeral value analysis"; +INITIALIZE_PASS_BEGIN(EphemeralValues, EV_NAME, + ev_name, false, false) +INITIALIZE_PASS_END(EphemeralValues, EV_NAME, + ev_name, false, false) + +ModulePass *llvm::createEphemeralValuesPass() { + return new EphemeralValues(); +} + +EphemeralValues::EphemeralValues() : ModulePass(ID) { + initializeEphemeralValuesPass(*PassRegistry::getPassRegistry()); +} + +void EphemeralValues::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +void EphemeralValues::print(raw_ostream &OS, const Module *M) const { + if (!M) + return; + + OS << "Ephemeral values...\n"; + for (Module::const_iterator L = M->begin(), LE = M->end(); L != LE; ++L) + for (Function::const_iterator I = L->begin(), IE = L->end(); I != IE; ++I) + for (BasicBlock::const_iterator J = I->getFirstInsertionPt(), + JE = I->end(); J != JE; ++J) + if (isEphemeralValue(J)) { + OS << "\tephemeral: " << L->getName() << ": " << I->getName() << + ": " << *J << "\n"; + } +} + +bool EphemeralValues::runOnModule(Module &M) { + DenseSet Visited; + SmallVector WorkSet; + + EphValues.clear(); + + for (Module::iterator L = M.begin(), LE = M.end(); L != LE; ++L) + for (Function::iterator I = L->begin(), IE = L->end(); I != IE; ++I) + for (BasicBlock::iterator J = I->getFirstInsertionPt(), JE = I->end(); + J != JE; ++J) + if (CallInst *CI = dyn_cast(J)) + if (Function *F2 = CI->getCalledFunction()) + if (F2->getIntrinsicID() == Intrinsic::invariant) { + WorkSet.push_back(CI); + EphValues.insert(CI); + } + + while (!WorkSet.empty()) { + Value *V = WorkSet.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + // If all uses of this value are ephemeral, then so is this value. + bool FoundNEUse = false; + for (Value::use_iterator I = V->use_begin(), IE = V->use_end(); + I != IE; ++I) + if (!EphValues.count(*I)) { + FoundNEUse = true; + break; + } + + if (!FoundNEUse) { + EphValues.insert(V); + + if (User *U = dyn_cast(V)) + for (User::op_iterator J = U->op_begin(), JE = U->op_end(); + J != JE; ++J) + WorkSet.push_back(*J); + } + } + + return false; +} + Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline-cost" +#include "llvm/Analysis/EphemeralValues.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -43,6 +44,8 @@ // DataLayout if available, or null. const DataLayout *const TD; + // EphemeralValues if available, or null. + const EphemeralValues *const EV; // The called function. Function &F; @@ -125,8 +128,9 @@ bool visitCallSite(CallSite CS); public: - CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) - : TD(TD), F(Callee), Threshold(Threshold), Cost(0), + CallAnalyzer(const DataLayout *TD, const EphemeralValues *EV, + Function &Callee, int Threshold) + : TD(TD), EV(EV), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), @@ -670,7 +674,7 @@ // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(TD, EV, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -714,7 +718,7 @@ // all of the per-instruction logic. The visit tree returns true if we // consumed the instruction in any way, and false if the instruction's base // cost should count against inlining. - if (Base::visit(I)) + if ((EV && EV->isEphemeralValue(I)) || Base::visit(I)) ++NumInstructionsSimplified; else Cost += InlineConstants::InstrCost; @@ -1058,7 +1062,7 @@ DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(TD, *Callee, Threshold); + CallAnalyzer CA(TD, EV, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -1898,6 +1898,7 @@ // should be considered at least *safe* to speculate... case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::invariant: return true; case Intrinsic::bswap: Index: lib/CodeGen/IntrinsicLowering.cpp =================================================================== --- lib/CodeGen/IntrinsicLowering.cpp +++ lib/CodeGen/IntrinsicLowering.cpp @@ -453,6 +453,7 @@ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; + case Intrinsic::invariant: case Intrinsic::var_annotation: break; // Strip out annotate intrinsic Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5018,6 +5018,7 @@ setValue(&I, Res); return 0; } + case Intrinsic::invariant: case Intrinsic::var_annotation: // Discard annotate attributes return 0; Index: lib/Transforms/IPO/InlineSimple.cpp =================================================================== --- lib/Transforms/IPO/InlineSimple.cpp +++ lib/Transforms/IPO/InlineSimple.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/EphemeralValues.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/CallingConv.h" #include "llvm/DataLayout.h" @@ -42,6 +43,10 @@ InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, getInlineThreshold(CS)); } + virtual void getAnalysisUsage(AnalysisUsage &Info) const { + Info.addRequired(); + Inliner::getAnalysisUsage(Info); + } using llvm::Pass::doInitialization; virtual bool doInitialization(CallGraph &CG); }; @@ -51,6 +56,7 @@ INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(EphemeralValues) INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining", false, false) @@ -64,6 +70,12 @@ // annotated with the noinline attribute. bool SimpleInliner::doInitialization(CallGraph &CG) { CA.setDataLayout(getAnalysisIfAvailable()); + + // FIXME: We need to use getAnalysisIfAvailable instead of getAnalysis + // because, even though the pass was been required, it will not have been + // run. getAnalysisIfAvailable will run the pass now, while getAnalysis + // will not (and will assert instead). + CA.setEphemeralValues(getAnalysisIfAvailable()); return false; } Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -195,6 +195,8 @@ MPM.add(createLoopUnrollPass()); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + MPM.add(createAlignmentInvPropPass()); // Alignment invariants + if (OptLevel > 1) MPM.add(createGVNPass()); // Remove redundancies MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset Index: lib/Transforms/Scalar/ADCE.cpp =================================================================== --- lib/Transforms/Scalar/ADCE.cpp +++ lib/Transforms/Scalar/ADCE.cpp @@ -23,6 +23,7 @@ #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/InstIterator.h" @@ -49,6 +50,13 @@ char ADCE::ID = 0; INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false) +static bool isInvariantIntrinsic(Instruction *I) { + if (IntrinsicInst *II = dyn_cast(I)) + return II->getIntrinsicID() == Intrinsic::invariant; + + return false; +} + bool ADCE::runOnFunction(Function& F) { SmallPtrSet alive; SmallVector worklist; @@ -57,6 +65,7 @@ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isa(I.getInstructionIterator()) || isa(I.getInstructionIterator()) || + isInvariantIntrinsic(I.getInstructionIterator()) || isa(I.getInstructionIterator()) || I->mayHaveSideEffects()) { alive.insert(I.getInstructionIterator()); Index: lib/Transforms/Scalar/AlignmentInvProp.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/AlignmentInvProp.cpp @@ -0,0 +1,386 @@ +//===------------------------ AlignmentInvProp.cpp ------------------------===// +// Code to perform Alignment Invariant Propagation +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements alignment invariant propagation. +// +//===----------------------------------------------------------------------===// + +#define AA_NAME "alignment-inv-prop" +#define DEBUG_TYPE AA_NAME +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constant.h" +#include "llvm/Instruction.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/DataLayout.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/PostOrderIterator.h" +using namespace llvm; + +STATISTIC(NumLoadAlignChanged, + "Number of loads changed by alignment assumptions"); +STATISTIC(NumStoreAlignChanged, + "Number of stores changed by alignment assumptions"); +STATISTIC(NumMemIntAlignChanged, + "Number of memory intrinsics changed by alignment assumptions"); + +namespace { + struct AlignmentInvProp : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + AlignmentInvProp() : FunctionPass(ID) { + initializeAlignmentInvPropPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + } + }; +} + +char AlignmentInvProp::ID = 0; +static const char aip_name[] = "Alignment invariant propagation"; +INITIALIZE_PASS_BEGIN(AlignmentInvProp, AA_NAME, + aip_name, false, false) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_END(AlignmentInvProp, AA_NAME, + aip_name, false, false) + +FunctionPass *llvm::createAlignmentInvPropPass() { + return new AlignmentInvProp(); +} + +static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV, + const SCEV *AlignSCEV, + ScalarEvolution *SE) { + // DiffUnits = Diff % int64_t(Alignment) + const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV); + const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV); + const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV); + + DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " << + *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n"); + + if (const SCEVConstant *ConstDUSCEV = + dyn_cast(DiffUnitsSCEV)) { + int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue(); + + if (!DiffUnits) + return (unsigned) + cast(AlignSCEV)->getValue()->getSExtValue(); + + uint64_t DiffUnitsAbs = abs64(DiffUnits); + if (isPowerOf2_64(DiffUnitsAbs)) + return (unsigned) DiffUnitsAbs; + } + + return 0; +} + +static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV, + const SCEV *OffSCEV, Value *Ptr, + ScalarEvolution *SE) { + const SCEV *PtrSCEV = SE->getSCEV(Ptr); + const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV); + + // What we really want to know if the overall offset to the aligned + // address. This address is displaced by the provided offset. + DiffSCEV = SE->getMinusSCEV(DiffSCEV, OffSCEV); + + DEBUG(dbgs() << AA_NAME ": alignment of " << *Ptr << " relative to " << + *AlignSCEV << " and offset " << *OffSCEV << + " using diff " << *DiffSCEV << "\n"); + + unsigned NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE); + DEBUG(dbgs() << "\tnew alignment: " << NewAlignment << "\n"); + + if (NewAlignment) { + return NewAlignment; + } else if (const SCEVAddRecExpr *DiffARSCEV = + dyn_cast(DiffSCEV)) { + // The relative offset to the alignment assumption did not yield a constant, + // but we should try harder: if we assume that a is 32-byte aligned, then in + // for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are + // 32-byte aligned, but instead alternate between 32 and 16-byte alignment. + // As a result, the new alignment will not be a constant, but can still + // be improved over the default (of 4) to 16. + + const SCEV *DiffStartSCEV = DiffARSCEV->getStart(); + const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE); + + DEBUG(dbgs() << "\ttrying start/inc alignment using start " << + *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n"); + + NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE); + unsigned NewIncAlignment = getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE); + + DEBUG(dbgs() << "\tnew start alignment: " << NewAlignment << "\n"); + DEBUG(dbgs() << "\tnew inc alignment: " << NewIncAlignment << "\n"); + + if (NewAlignment > NewIncAlignment) { + if (NewAlignment % NewIncAlignment == 0) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewIncAlignment << "\n"); + return NewIncAlignment; + } + } else if (NewIncAlignment > NewAlignment) { + if (NewIncAlignment % NewAlignment == 0) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewAlignment << "\n"); + return NewAlignment; + } + } else if (NewIncAlignment == NewAlignment && NewIncAlignment) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewAlignment << "\n"); + return NewAlignment; + } + } + + return 0; +} + +bool AlignmentInvProp::runOnFunction(Function &F) { + SmallVector InvConds; + BasicBlock *EntryBB = F.begin(); + for (df_iterator I = df_begin(EntryBB), IE = df_end(EntryBB); + I != IE; ++I) + for (BasicBlock::iterator J = I->getFirstInsertionPt(), JE = I->end(); + J != JE; ++J) + if (CallInst *CI = dyn_cast(J)) + if (Function *F2 = CI->getCalledFunction()) + if (F2->getIntrinsicID() == Intrinsic::invariant) + InvConds.push_back(CI->getArgOperand(0)); + + // Visit all invariant conditions, and split those that are ands of + // other conditions. + DenseSet VisitedInvCond; + while (VisitedInvCond.size() != InvConds.size()) { + SmallVector NewInvConds; + for (SmallVector::iterator I = InvConds.begin(), + IE = InvConds.end(); I != IE; ++I) { + if (BinaryOperator *BO = dyn_cast(*I)) { + if (BO->getOpcode() == Instruction::And) { + NewInvConds.push_back(BO->getOperand(0)); + NewInvConds.push_back(BO->getOperand(1)); + } + } + + VisitedInvCond.insert(*I); + } + + InvConds.insert(InvConds.end(), NewInvConds.begin(), NewInvConds.end()); + NewInvConds.clear(); + } + + bool Changed = false; + ScalarEvolution *SE = &getAnalysis(); + + DenseMap NewDestAlignments, NewSrcAlignments; + + for (SmallVector::iterator I = InvConds.begin(), + IE = InvConds.end(); I != IE; ++I) { + // An alignment invariant must be a statement about the least-significant + // bits of the pointer being zero, possibly with some offset. + ICmpInst *ICI = dyn_cast(*I); + if (!ICI) + continue; + + // This must be an expression of the form: x & m == 0. + if (ICI->getPredicate() != ICmpInst::ICMP_EQ) + continue; + + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS); + const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS); + if (CmpLHSSCEV->isZero()) + std::swap(CmpLHS, CmpRHS); + else if (!CmpRHSSCEV->isZero()) + continue; + + BinaryOperator *CmpBO = dyn_cast(CmpLHS); + if (!CmpBO || CmpBO->getOpcode() != Instruction::And) + continue; + + Value *AndLHS = CmpBO->getOperand(0); + Value *AndRHS = CmpBO->getOperand(1); + const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS); + const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS); + if (isa(AndLHSSCEV)) { + std::swap(AndLHS, AndRHS); + std::swap(AndLHSSCEV, AndRHSSCEV); + } + + const SCEVConstant *MaskSCEV = dyn_cast(AndRHSSCEV); + if (!MaskSCEV) + continue; + + unsigned TrailingOnes = + MaskSCEV->getValue()->getValue().countTrailingOnes(); + if (!TrailingOnes) + continue; + + uint64_t Alignment; + TrailingOnes = std::min(TrailingOnes, + unsigned(sizeof(unsigned) * CHAR_BIT - 1)); + Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment); + + Type *Int64Ty = Type::getInt64Ty(F.getContext()); + const SCEV *AlignSCEV = SE->getConstant(Int64Ty, Alignment); + + // The LHS might be a ptrtoint instruction, or it might be the pointer + // with an offset. + Value *AAPtr = 0; + const SCEV *OffSCEV = 0; + if (PtrToIntInst *PToI = dyn_cast(AndLHS)) { + AAPtr = PToI->getPointerOperand(); + OffSCEV = SE->getConstant(Int64Ty, 0); + } else if (const SCEVAddExpr* AndLHSAddSCEV = + dyn_cast(AndLHSSCEV)) { + // Try to find the ptrtoint; subtract it and the rest is the offset. + for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(), + JE = AndLHSAddSCEV->op_end(); J != JE; ++J) + if (const SCEVUnknown *OpUnk = dyn_cast(*J)) + if (PtrToIntInst *PToI = dyn_cast(OpUnk->getValue())) { + AAPtr = PToI->getPointerOperand(); + OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J); + break; + } + } + + if (!AAPtr) + continue; + + unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits(); + if (OffSCEVBits < 64) + OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty); + else if (OffSCEVBits > 64) + continue; + + AAPtr = AAPtr->stripPointerCasts(); + const SCEV *AASCEV = SE->getSCEV(AAPtr); + + // Apply the assumption to all other users of the specified pointer. + DenseSet Visited; + SmallVector WorkList; + for (Value::use_iterator J = AAPtr->use_begin(), + JE = AAPtr->use_end(); J != JE; ++J) { + if (*J == *I) + continue; + + if (Instruction *K = dyn_cast(*J)) + WorkList.push_back(K); + } + + while (!WorkList.empty()) { + Instruction *J = WorkList.pop_back_val(); + + if (LoadInst *LI = dyn_cast(J)) { + unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + LI->getPointerOperand(), SE); + + if (NewAlignment > LI->getAlignment()) { + LI->setAlignment(NewAlignment); + ++NumLoadAlignChanged; + Changed = true; + } + } else if (StoreInst *SI = dyn_cast(J)) { + unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + SI->getPointerOperand(), SE); + + if (NewAlignment > SI->getAlignment()) { + SI->setAlignment(NewAlignment); + ++NumStoreAlignChanged; + Changed = true; + } + } else if (MemIntrinsic *MI = dyn_cast(J)) { + unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + MI->getDest(), SE); + + // For memory transfers, we need a common alignment for both the + // source and destination. If we have a new alignment for this + // instruction, but only for one operand, save it. If we reach the + // other operand through another assumption later, then we may + // change the alignment at that point. + if (MemTransferInst *MTI = dyn_cast(MI)) { + unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + MTI->getSource(), SE); + + DenseMap::iterator DI = + NewDestAlignments.find(MTI); + unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ? + 0 : DI->second; + + DenseMap::iterator SI = + NewSrcAlignments.find(MTI); + unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ? + 0 : SI->second; + + DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " << + AltDestAlignment << " " << NewSrcAlignment << + " " << AltSrcAlignment << "\n"); + + // If these four alignments, pick the largest possible... + unsigned NewAlignment = 0; + if (NewDestAlignment <= NewSrcAlignment || + NewDestAlignment <= AltSrcAlignment) + NewAlignment = std::max(NewAlignment, NewDestAlignment); + if (AltDestAlignment <= NewSrcAlignment || + AltDestAlignment <= AltSrcAlignment) + NewAlignment = std::max(NewAlignment, AltDestAlignment); + if (NewSrcAlignment <= NewDestAlignment || + NewSrcAlignment <= AltDestAlignment) + NewAlignment = std::max(NewAlignment, NewSrcAlignment); + if (AltSrcAlignment <= NewDestAlignment || + AltSrcAlignment <= AltDestAlignment) + NewAlignment = std::max(NewAlignment, AltSrcAlignment); + + if (NewAlignment > MI->getAlignment()) { + MI->setAlignment(ConstantInt::get(Type::getInt32Ty( + MI->getParent()->getContext()), NewAlignment)); + ++NumMemIntAlignChanged; + Changed = true; + } + + NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment)); + NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment)); + } else if (NewDestAlignment > MI->getAlignment()) { + MI->setAlignment(ConstantInt::get(Type::getInt32Ty( + MI->getParent()->getContext()), NewDestAlignment)); + ++NumMemIntAlignChanged; + Changed = true; + } + } + + Visited.insert(J); + for (Value::use_iterator UJ = J->use_begin(), UE = J->use_end(); + UJ != UE; ++UJ) { + Instruction *K = cast(*UJ); + if (!Visited.count(K)) + WorkList.push_back(cast(*UJ)); + } + } + + Changed = true; + } + + return Changed; +} + Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp + AlignmentInvProp.cpp BasicBlockPlacement.cpp CodeGenPrepare.cpp ConstantProp.cpp Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -28,6 +28,7 @@ /// ScalarOpts library. void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCEPass(Registry); + initializeAlignmentInvPropPass(Registry); initializeBlockPlacementPass(Registry); initializeCodeGenPreparePass(Registry); initializeConstantPropagationPass(Registry); @@ -76,6 +77,10 @@ unwrap(PM)->add(createAggressiveDCEPass()); } +void LLVMAddAlignmentInvPropPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createAlignmentInvPropPass()); +} + void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCFGSimplificationPass()); } Index: lib/Transforms/Utils/Local.cpp =================================================================== --- lib/Transforms/Utils/Local.cpp +++ lib/Transforms/Utils/Local.cpp @@ -286,6 +286,12 @@ return true; } + if (IntrinsicInst *II = dyn_cast(I)) { + // Invariants are dead if their condition is undef. + if (II->getIntrinsicID() == Intrinsic::invariant) + return isa(II->getArgOperand(0)); + } + if (!I->mayHaveSideEffects()) return true; // Special case intrinsics that "may have side effects" but can be deleted Index: test/Analysis/EphemeralValues/lit.local.cfg =================================================================== --- /dev/null +++ test/Analysis/EphemeralValues/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll', '.c', '.cpp'] Index: test/Analysis/EphemeralValues/simple.ll =================================================================== --- /dev/null +++ test/Analysis/EphemeralValues/simple.ll @@ -0,0 +1,98 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -analyze -eph-values | FileCheck %s + +define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %0 = load i32* %a, align 4, !tbaa !0 + ret i32 %0 + +; CHECK: ephemeral: foo: entry: %ptrint = ptrtoint i32* %a to i64 +; CHECK: ephemeral: foo: entry: %maskedptr = and i64 %ptrint, 31 +; CHECK: ephemeral: foo: entry: %maskcond = icmp eq i64 %maskedptr, 0 +; CHECK: ephemeral: foo: entry: tail call void @llvm.invariant(i1 %maskcond) +} + +define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %offsetptr = add i64 %ptrint, 24 + %maskedptr = and i64 %offsetptr, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %arrayidx = getelementptr inbounds i32* %a, i64 2 + %0 = load i32* %arrayidx, align 4, !tbaa !0 + ret i32 %0 + +; CHECK: ephemeral: foo2: entry: %ptrint = ptrtoint i32* %a to i64 +; CHECK: ephemeral: foo2: entry: %offsetptr = add i64 %ptrint, 24 +; CHECK: ephemeral: foo2: entry: %maskedptr = and i64 %offsetptr, 31 +; CHECK: ephemeral: foo2: entry: %maskcond = icmp eq i64 %maskedptr, 0 +; CHECK: ephemeral: foo2: entry: tail call void @llvm.invariant(i1 %maskcond) +} + +define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 8 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + tail call void @llvm.invariant(i1 %maskcond) + ret i32 %add.lcssa + +; CHECK: ephemeral: hoo: entry: %ptrint = ptrtoint i32* %a to i64 +; CHECK: ephemeral: hoo: entry: %maskedptr = and i64 %ptrint, 31 +; CHECK: ephemeral: hoo: entry: %maskcond = icmp eq i64 %maskedptr, 0 +; CHECK: ephemeral: hoo: for.end: tail call void @llvm.invariant(i1 %maskcond) +} + +define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %ptrint1 = ptrtoint i32* %b to i64 + %maskedptr3 = and i64 %ptrint1, 127 + %maskcond4 = icmp eq i64 %maskedptr3, 0 + tail call void @llvm.invariant(i1 %maskcond4) + %0 = bitcast i32* %a to i8* + %1 = bitcast i32* %b to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false) + ret i32 undef + +; CHECK: ephemeral: moo2: entry: %ptrint = ptrtoint i32* %a to i64 +; CHECK: ephemeral: moo2: entry: %maskedptr = and i64 %ptrint, 31 +; CHECK: ephemeral: moo2: entry: %maskcond = icmp eq i64 %maskedptr, 0 +; CHECK: ephemeral: moo2: entry: tail call void @llvm.invariant(i1 %maskcond) +; CHECK: ephemeral: moo2: entry: %ptrint1 = ptrtoint i32* %b to i64 +; CHECK: ephemeral: moo2: entry: %maskedptr3 = and i64 %ptrint1, 127 +; CHECK: ephemeral: moo2: entry: %maskcond4 = icmp eq i64 %maskedptr3, 0 +; CHECK: ephemeral: moo2: entry: tail call void @llvm.invariant(i1 %maskcond4) +} + +declare void @llvm.invariant(i1) nounwind readnone + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} + Index: test/Transforms/AlignmentInvProp/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/AlignmentInvProp/lit.local.cfg @@ -0,0 +1,2 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + Index: test/Transforms/AlignmentInvProp/simple.ll =================================================================== --- /dev/null +++ test/Transforms/AlignmentInvProp/simple.ll @@ -0,0 +1,219 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -alignment-inv-prop -S | FileCheck %s + +define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %0 = load i32* %a, align 4, !tbaa !0 + ret i32 %0 + +; CHECK: @foo +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %offsetptr = add i64 %ptrint, 24 + %maskedptr = and i64 %offsetptr, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %arrayidx = getelementptr inbounds i32* %a, i64 2 + %0 = load i32* %arrayidx, align 4, !tbaa !0 + ret i32 %0 + +; CHECK: @foo2 +; CHECK: load i32* {{[^,]+}}, align 16 +; CHECK: ret i32 +} + +define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %offsetptr = add i64 %ptrint, 28 + %maskedptr = and i64 %offsetptr, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %arrayidx = getelementptr inbounds i32* %a, i64 -1 + %0 = load i32* %arrayidx, align 4, !tbaa !0 + ret i32 %0 + +; CHECK: @foo2a +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @goo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %0 = load i32* %a, align 4, !tbaa !0 + ret i32 %0 + +; CHECK: @goo +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 8 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + tail call void @llvm.invariant(i1 %maskcond) + ret i32 %add.lcssa + +; CHECK: @hoo +; CHECK: load i32* %arrayidx, align 32 +; CHECK: ret i32 %add.lcssa +} + +define i32 @joo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 8 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + tail call void @llvm.invariant(i1 %maskcond) + ret i32 %add.lcssa + +; CHECK: @joo +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @koo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 4 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + tail call void @llvm.invariant(i1 %maskcond) + ret i32 %add.lcssa + +; CHECK: @koo +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 4 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + tail call void @llvm.invariant(i1 %maskcond) + ret i32 %add.lcssa + +; CHECK: @koo2 +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @moo(i32* nocapture %a) nounwind uwtable { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %0 = bitcast i32* %a to i8* + tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false) + ret i32 undef + +; CHECK: @moo +; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false) +; CHECK: ret i32 undef +} + +define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.invariant(i1 %maskcond) + %ptrint1 = ptrtoint i32* %b to i64 + %maskedptr3 = and i64 %ptrint1, 127 + %maskcond4 = icmp eq i64 %maskedptr3, 0 + tail call void @llvm.invariant(i1 %maskcond4) + %0 = bitcast i32* %a to i8* + %1 = bitcast i32* %b to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false) + ret i32 undef + +; CHECK: @moo2 +; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false) +; CHECK: ret i32 undef +} + +declare void @llvm.invariant(i1) nounwind readnone + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} + Index: test/Transforms/Inline/ephemeral.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/ephemeral.ll @@ -0,0 +1,29 @@ +; RUN: opt -S -Oz %s | FileCheck %s + +@a = global i32 4 + +define i1 @inner() { + %a1 = load volatile i32* @a + %x1 = add i32 %a1, %a1 + %c = icmp eq i32 %x1, 0 + + %a2 = mul i32 %a1, %a1 + %a3 = sub i32 %a1, 5 + %a4 = udiv i32 %a3, -13 + %a5 = mul i32 %a4, %a4 + %a6 = add i32 %a5, %x1 + %ca = icmp sgt i32 %a6, -7 + tail call void @llvm.invariant(i1 %ca) + + ret i1 %c +} + +; @inner() should be inlined for -Oz. +; CHECK-NOT: call i1 @inner +define i1 @outer() optsize { + %r = call i1 @inner() + ret i1 %r +} + +declare void @llvm.invariant(i1) nounwind readnone +