Index: docs/LangRef.html =================================================================== --- docs/LangRef.html +++ docs/LangRef.html @@ -329,6 +329,8 @@
  • 'llvm.objectsize' Intrinsic
  • + 'llvm.assume.aligned' Intrinsic
  • +
  • 'llvm.expect' Intrinsic
  • 'llvm.donothing' Intrinsic
  • @@ -9105,6 +9107,49 @@

    + 'llvm.assume.aligned' Intrinsic +

    + +
    + +
    Syntax:
    +

    This is an overloaded intrinsic. You can use llvm.assume.aligned on any + integer bit width for the offset and for different address spaces.

    + +
    +  declare i8* @llvm.assume.aligned.p0i8.i64(i8* <addr>, i32 <align>, i64 <offset>)
    +
    + +
    Overview:
    +

    The 'llvm.assume.aligned.*' intrinsics allow the optimizer to assume + that the address returned (which references the same memory location as the + address provided) is the given number of bytes offset from an address with the + alignment specified.

    + +
    Arguments:
    + +

    The first argument is a pointer about which an alignment assumption is to be made. + This pointer value is also the return value of the intrinsic, and the assumption + applies to uses of this return value.

    + +

    The second argument is the alignment.

    + +

    The third argument is the offset from the provided address to the + address with the specified alignment. For example, if an alignment of 32 is + specified, and an offset of 8 is specified, then the return value minus 8 bytes + is assumed to be 32-byte aligned.

    + +
    Semantics:
    + +

    The optimizer may use the assumption specified to override the alignments + specified on loads and stores, memory intrinsics, and other alignment-sensitive + instructions which use the returned address. This applies not only to direct + uses of the return address, but also to uses of addresses derived from the + returned address.

    +
    + + +

    'llvm.donothing' Intrinsic

    Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -35,6 +35,9 @@ /** See llvm::createAggressiveDCEPass function. */ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM); +/** See llvm::createAssumeAlignPass function. */ +void LLVMAddAssumeAlignPass(LLVMPassManagerRef PM); + /** See llvm::createCFGSimplificationPass function. */ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -66,6 +66,7 @@ void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlwaysInlinerPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); +void initializeAssumeAlignPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAliasAnalysisPass(PassRegistry&); void initializeBasicCallGraphPass(PassRegistry&); Index: include/llvm/Intrinsics.td =================================================================== --- include/llvm/Intrinsics.td +++ include/llvm/Intrinsics.td @@ -230,6 +230,9 @@ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem, NoCapture<0>]>; +def int_assume_aligned : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, llvm_i32_ty, + llvm_anyint_ty], [IntrNoMem]>; def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>; def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -50,6 +50,7 @@ (void) llvm::createAliasAnalysisCounterPass(); (void) llvm::createAliasDebugger(); (void) llvm::createArgumentPromotionPass(); + (void) llvm::createAssumeAlignPass(); (void) llvm::createBasicAliasAnalysisPass(); (void) llvm::createLibCallAliasAnalysisPass(0); (void) llvm::createScalarEvolutionAliasAnalysisPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -32,6 +32,12 @@ //===----------------------------------------------------------------------===// // +// AssumeAlign - A worklist driven alignment assumption propagation pass +// +FunctionPass *createAssumeAlignPass(); + +//===----------------------------------------------------------------------===// +// // SCCP - Sparse conditional constant propagation. // FunctionPass *createSCCPPass(); Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -217,6 +217,21 @@ return V; } +static const Value *getAssumeAlignedPtr(const Value *V) { + const CallInst *CI = dyn_cast(V); + if (!CI) + return 0; + + const Function *F = CI->getCalledFunction(); + if (!F) + return 0; + + if (F->getIntrinsicID() == Intrinsic::assume_aligned) + return CI->getArgOperand(0); + + return 0; +} + /// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it /// into a base pointer with a constant offset and a number of scaled symbolic /// offsets. @@ -239,6 +254,11 @@ BaseOffs = 0; do { + if (const Value *V2 = getAssumeAlignedPtr(V)) { + V = V2; + continue; + } + // See if this is a bitcast or GEP. const Operator *Op = dyn_cast(V); if (Op == 0) { @@ -1158,6 +1178,21 @@ return Alias; } +static const Value *stripPointerCasts(const Value *V) { + bool Done; + do { + V = V->stripPointerCasts(); + if (const Value *V2 = getAssumeAlignedPtr(V)) { + V = V2; + Done = false; + } else { + Done = true; + } + } while (!Done); + + return V; +} + // aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, // such as array references. // @@ -1172,8 +1207,8 @@ return NoAlias; // Strip off any casts if they exist. - V1 = V1->stripPointerCasts(); - V2 = V2->stripPointerCasts(); + V1 = stripPointerCasts(V1); + V2 = stripPointerCasts(V2); // Are we checking for alias of the same value? if (V1 == V2) return MustAlias; Index: lib/Analysis/CaptureTracking.cpp =================================================================== --- lib/Analysis/CaptureTracking.cpp +++ lib/Analysis/CaptureTracking.cpp @@ -16,6 +16,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/CaptureTracking.h" @@ -108,34 +111,31 @@ if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) break; - // Not captured if only passed via 'nocapture' arguments. Note that - // calling a function pointer does not in itself cause the pointer to - // be captured. This is a subtle point considering that (for example) - // the callee might return its own address. It is analogous to saying - // that loading a value from a pointer does not cause the pointer to be - // captured, even though the loaded value might be the pointer itself - // (think of self-referential objects). - CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (CallSite::arg_iterator A = B; A != E; ++A) - if (A->get() == V && !CS.doesNotCapture(A - B)) - // The parameter is not marked 'nocapture' - captured. - if (Tracker->captured(U)) - return; - break; + bool IsAssumeAligned = false; + if (Function *F = CS.getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::assume_aligned) + IsAssumeAligned = true; + + if (!IsAssumeAligned) { + // Not captured if only passed via 'nocapture' arguments. Note that + // calling a function pointer does not in itself cause the pointer to + // be captured. This is a subtle point considering that (for example) + // the callee might return its own address. It is analogous to saying + // that loading a value from a pointer does not cause the pointer to be + // captured, even though the loaded value might be the pointer itself + // (think of self-referential objects). + CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (CallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V && !CS.doesNotCapture(A - B)) + // The parameter is not marked 'nocapture' - captured. + if (Tracker->captured(U)) + return; + + break; + } + + // fallthrough; treat assume_aligned like a bitcast } - case Instruction::Load: - // Loading from a pointer does not cause it to be captured. - break; - case Instruction::VAArg: - // "va-arg" from a pointer does not cause it to be captured. - break; - case Instruction::Store: - if (V == I->getOperand(0)) - // Stored the pointer - conservatively assume it may be captured. - if (Tracker->captured(U)) - return; - // Storing to the pointee does not cause the pointer to be captured. - break; case Instruction::BitCast: case Instruction::GetElementPtr: case Instruction::PHI: @@ -149,6 +149,19 @@ Worklist.push_back(U); } break; + case Instruction::Load: + // Loading from a pointer does not cause it to be captured. + break; + case Instruction::VAArg: + // "va-arg" from a pointer does not cause it to be captured. + break; + case Instruction::Store: + if (V == I->getOperand(0)) + // Stored the pointer - conservatively assume it may be captured. + if (Tracker->captured(U)) + return; + // Storing to the pointee does not cause the pointer to be captured. + break; case Instruction::ICmp: // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results Index: lib/Analysis/CodeMetrics.cpp =================================================================== --- lib/Analysis/CodeMetrics.cpp +++ lib/Analysis/CodeMetrics.cpp @@ -67,6 +67,7 @@ switch (II->getIntrinsicID()) { default: return false; + case Intrinsic::assume_aligned: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::invariant_start: Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -19,6 +19,9 @@ #define DEBUG_TYPE "instsimplify" #include "llvm/GlobalAlias.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" #include "llvm/Operator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SetVector.h" @@ -690,6 +693,21 @@ return true; } +static Value *getAssumeAlignedPtr(Value *V) { + CallInst *CI = dyn_cast(V); + if (!CI) + return 0; + + Function *F = CI->getCalledFunction(); + if (!F) + return 0; + + if (F->getIntrinsicID() == Intrinsic::assume_aligned) + return CI->getArgOperand(0); + + return 0; +} + /// \brief Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and @@ -715,6 +733,8 @@ V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); + } else if (Value *V2 = getAssumeAlignedPtr(V)) { + V = V2; } else if (GlobalAlias *GA = dyn_cast(V)) { if (GA->mayBeOverridden()) break; Index: lib/Analysis/Loads.cpp =================================================================== --- lib/Analysis/Loads.cpp +++ lib/Analysis/Loads.cpp @@ -48,6 +48,21 @@ return false; } +static Value *getAssumeAlignedPtr(Value *V) { + CallInst *CI = dyn_cast(V); + if (!CI) + return 0; + + Function *F = CI->getCalledFunction(); + if (!F) + return 0; + + if (F->getIntrinsicID() == Intrinsic::assume_aligned) + return CI->getArgOperand(0); + + return 0; +} + /// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and /// bitcasts to get back to the underlying object being addressed, keeping /// track of the offset in bytes from the GEPs relative to the result. @@ -68,6 +83,8 @@ V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); + } else if (Value *V2 = getAssumeAlignedPtr(V)) { + V = V2; } else if (GlobalAlias *GA = dyn_cast(V)) { if (GA->mayBeOverridden()) return V; Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -62,9 +62,12 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/GlobalAlias.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" @@ -3808,6 +3811,16 @@ case Instruction::SExt: return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + case Instruction::Call: + if (isSCEVable(V->getType())) { + // calls to assume_align are treated like no-op casts. + CallInst *CI = cast(V); + Function *F = CI->getCalledFunction(); + if (F && F->getIntrinsicID() == Intrinsic::assume_aligned) + return getSCEV(CI->getArgOperand(0)); + } + + break; case Instruction::BitCast: // BitCasts are no-op casts so we just eliminate the cast. if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -1585,6 +1585,21 @@ return 0; } +static Value *getAssumeAlignedPtr(Value *V) { + CallInst *CI = dyn_cast(V); + if (!CI) + return 0; + + Function *F = CI->getCalledFunction(); + if (!F) + return 0; + + if (F->getIntrinsicID() == Intrinsic::assume_aligned) + return CI->getArgOperand(0); + + return 0; +} + /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. @@ -1597,6 +1612,9 @@ // Just look through bitcasts. if (PtrOp->getOpcode() == Instruction::BitCast) return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); + + if (Value *P2 = getAssumeAlignedPtr(Ptr)) + return GetPointerBaseWithConstantOffset(P2, Offset, TD); // If this is a GEP with constant indices, we can look through it. GEPOperator *GEP = dyn_cast(PtrOp); @@ -1787,6 +1805,8 @@ if (GA->mayBeOverridden()) return V; V = GA->getAliasee(); + } else if (Value *VP = getAssumeAlignedPtr(V)) { + V = VP; } else { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast(V)) @@ -1896,6 +1916,7 @@ // information about their operands. // FIXME: There are other no-op synthetic instructions that potentially // should be considered at least *safe* to speculate... + case Intrinsic::assume_aligned: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: return true; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5060,6 +5060,7 @@ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; + case Intrinsic::assume_aligned: case Intrinsic::expect: { // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -199,6 +199,8 @@ MPM.add(createLoopUnrollPass()); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + MPM.add(createAssumeAlignPass()); // Alignment assumptions + if (OptLevel > 1) MPM.add(createGVNPass()); // Remove redundancies MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset Index: lib/Transforms/Scalar/AssumeAlign.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/AssumeAlign.cpp @@ -0,0 +1,303 @@ +//===- AssumeAlign.cpp - Code to perform Simple Constant Propagation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements alignment assumption propagation. +// +//===----------------------------------------------------------------------===// + +#define AA_NAME "assumealign" +#define DEBUG_TYPE AA_NAME +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constant.h" +#include "llvm/Instruction.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/DataLayout.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/PostOrderIterator.h" +using namespace llvm; + +STATISTIC(NumLoadAlignChanged, + "Number of loads changed by alignment assumptions"); +STATISTIC(NumStoreAlignChanged, + "Number of stores changed by alignment assumptions"); +STATISTIC(NumMemIntAlignChanged, + "Number of memory intrinsics changed by alignment assumptions"); + +namespace { + struct AssumeAlign : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + AssumeAlign() : FunctionPass(ID) { + initializeAssumeAlignPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + } + }; +} + +char AssumeAlign::ID = 0; +static const char assumealign_name[] = "Alignment assumption propagation"; +INITIALIZE_PASS_BEGIN(AssumeAlign, AA_NAME, + assumealign_name, false, false) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_END(AssumeAlign, AA_NAME, + assumealign_name, false, false) + +FunctionPass *llvm::createAssumeAlignPass() { + return new AssumeAlign(); +} + +static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV, + const SCEV *AlignSCEV, + ScalarEvolution *SE) { + // DiffUnits = Diff % int64_t(Alignment) + const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV); + const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV); + const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV); + + DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " << + *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n"); + + if (const SCEVConstant *ConstDUSCEV = + dyn_cast(DiffUnitsSCEV)) { + int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue(); + + if (!DiffUnits) + return (unsigned) + cast(AlignSCEV)->getValue()->getSExtValue(); + + uint64_t DiffUnitsAbs = abs64(DiffUnits); + if (isPowerOf2_64(DiffUnitsAbs)) + return (unsigned) DiffUnitsAbs; + } + + return 0; +} + +static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV, + const SCEV *OffSCEV, Value *Ptr, + ScalarEvolution *SE) { + const SCEV *PtrSCEV = SE->getSCEV(Ptr); + const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV); + + // What we really want to know if the overall offset to the aligned + // address. This address is displaced by the provided offset. + DiffSCEV = SE->getAddExpr(DiffSCEV, OffSCEV); + + DEBUG(dbgs() << "assumealign: alignment of " << *Ptr << " relative to " << + *AlignSCEV << " and offset " << *OffSCEV << + " using diff " << *DiffSCEV << "\n"); + + unsigned NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE); + DEBUG(dbgs() << "\tnew alignment: " << NewAlignment << "\n"); + + if (NewAlignment) { + return NewAlignment; + } else if (const SCEVAddRecExpr *DiffARSCEV = + dyn_cast(DiffSCEV)) { + // The relative offset to the alignment assumption did not yield a constant, + // but we should try harder: if we assume that a is 32-byte aligned, then in + // for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are + // 32-byte aligned, but instead alternate between 32 and 16-byte alignment. + // As a result, the new alignment will not be a constant, but can still + // be improved over the default (of 4) to 16. + + const SCEV *DiffStartSCEV = DiffARSCEV->getStart(); + const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE); + + DEBUG(dbgs() << "\ttrying start/inc alignment using start " << + *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n"); + + NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE); + unsigned NewIncAlignment = getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE); + + DEBUG(dbgs() << "\tnew start alignment: " << NewAlignment << "\n"); + DEBUG(dbgs() << "\tnew inc alignment: " << NewIncAlignment << "\n"); + + if (NewAlignment > NewIncAlignment) { + if (NewAlignment % NewIncAlignment == 0) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewIncAlignment << "\n"); + return NewIncAlignment; + } + } else if (NewIncAlignment > NewAlignment) { + if (NewIncAlignment % NewAlignment == 0) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewAlignment << "\n"); + return NewAlignment; + } + } else if (NewIncAlignment == NewAlignment && NewIncAlignment) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewAlignment << "\n"); + return NewAlignment; + } + } + + return 0; +} + +bool AssumeAlign::runOnFunction(Function &F) { + SmallVector AACalls; + BasicBlock *EntryBB = F.begin(); + for (df_iterator I = df_begin(EntryBB), IE = df_end(EntryBB); I != IE; ++I) + for (BasicBlock::iterator J = I->getFirstInsertionPt(), JE = I->end(); + J != JE; ++J) + if (CallInst *CI = dyn_cast(J)) + if (Function *F2 = CI->getCalledFunction()) + if (F2->getIntrinsicID() == Intrinsic::assume_aligned) + AACalls.push_back(CI); + + bool Changed = false; + ScalarEvolution *SE = &getAnalysis(); + + DenseMap NewDestAlignments, NewSrcAlignments; + + for (SmallVector::iterator I = AACalls.begin(), + IE = AACalls.end(); I != IE; ++I) { + const SCEV *AASCEV = SE->getSCEV(*I); + + Value *AAPtr = (*I)->getArgOperand(0); + Value *AAAlign = (*I)->getArgOperand(1); + Value *AAOffset = (*I)->getArgOperand(2); + + Type *Int64Ty = Type::getInt64Ty(F.getContext()); + uint64_t Alignment; + if (ConstantInt *CI = dyn_cast(AAAlign)) { + Alignment = CI->getZExtValue(); + } else { + continue; + } + const SCEV *AlignSCEV = SE->getConstant(Int64Ty, Alignment); + + const SCEV *OffSCEV = SE->getSCEV(AAOffset); + unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits(); + if (OffSCEVBits < 64) + OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty); + else if (OffSCEVBits > 64) + continue; + + DenseSet Visited; + SmallVector WorkList; + for (Value::use_iterator J = (*I)->use_begin(), + JE = (*I)->use_end(); J != JE; ++J) + if (Instruction *K = dyn_cast(*J)) + WorkList.push_back(K); + + while (!WorkList.empty()) { + Instruction *J = WorkList.pop_back_val(); + + if (LoadInst *LI = dyn_cast(J)) { + unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + LI->getPointerOperand(), SE); + + if (NewAlignment > LI->getAlignment()) { + LI->setAlignment(NewAlignment); + ++NumLoadAlignChanged; + Changed = true; + } + } else if (StoreInst *SI = dyn_cast(J)) { + unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + SI->getPointerOperand(), SE); + + if (NewAlignment > SI->getAlignment()) { + SI->setAlignment(NewAlignment); + ++NumStoreAlignChanged; + Changed = true; + } + } else if (MemIntrinsic *MI = dyn_cast(J)) { + unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + MI->getDest(), SE); + + // For memory transfers, we need a common alignment for both the + // source and destination. If we have a new alignment for this + // instruction, but only for one operand, save it. If we reach the + // other operand through another assumption later, then we may + // change the alignment at that point. + if (MemTransferInst *MTI = dyn_cast(MI)) { + unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + MTI->getSource(), SE); + + DenseMap::iterator DI = + NewDestAlignments.find(MTI); + unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ? + 0 : DI->second; + + DenseMap::iterator SI = + NewSrcAlignments.find(MTI); + unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ? + 0 : SI->second; + + DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " << + AltDestAlignment << " " << NewSrcAlignment << + " " << AltSrcAlignment << "\n"); + + // If these four alignments, pick the largest possible... + unsigned NewAlignment = 0; + if (NewDestAlignment <= NewSrcAlignment || + NewDestAlignment <= AltSrcAlignment) + NewAlignment = std::max(NewAlignment, NewDestAlignment); + if (AltDestAlignment <= NewSrcAlignment || + AltDestAlignment <= AltSrcAlignment) + NewAlignment = std::max(NewAlignment, AltDestAlignment); + if (NewSrcAlignment <= NewDestAlignment || + NewSrcAlignment <= AltDestAlignment) + NewAlignment = std::max(NewAlignment, NewSrcAlignment); + if (AltSrcAlignment <= NewDestAlignment || + AltSrcAlignment <= AltDestAlignment) + NewAlignment = std::max(NewAlignment, AltSrcAlignment); + + if (NewAlignment > MI->getAlignment()) { + MI->setAlignment(ConstantInt::get(Type::getInt32Ty( + MI->getParent()->getContext()), NewAlignment)); + ++NumMemIntAlignChanged; + Changed = true; + } + + NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment)); + NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment)); + } else if (NewDestAlignment > MI->getAlignment()) { + MI->setAlignment(ConstantInt::get(Type::getInt32Ty( + MI->getParent()->getContext()), NewDestAlignment)); + ++NumMemIntAlignChanged; + Changed = true; + } + } + + Visited.insert(J); + for (Value::use_iterator UJ = J->use_begin(), UE = J->use_end(); + UJ != UE; ++UJ) { + Instruction *K = cast(*UJ); + if (!Visited.count(K)) + WorkList.push_back(cast(*UJ)); + } + } + + // Now that the alignment assumption has been processed, remove it. + (*I)->replaceAllUsesWith(AAPtr); + (*I)->eraseFromParent(); + + Changed = true; + } + + return Changed; +} + Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp + AssumeAlign.cpp BasicBlockPlacement.cpp CodeGenPrepare.cpp ConstantProp.cpp Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -28,6 +28,7 @@ /// ScalarOpts library. void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCEPass(Registry); + initializeAssumeAlignPass(Registry); initializeBlockPlacementPass(Registry); initializeCodeGenPreparePass(Registry); initializeConstantPropagationPass(Registry); @@ -76,6 +77,10 @@ unwrap(PM)->add(createAggressiveDCEPass()); } +void LLVMAddAssumeAlignPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createAssumeAlignPass()); +} + void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCFGSimplificationPass()); } Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -3890,6 +3890,13 @@ if (BitCastInst *BC = dyn_cast(Use)) return passingValueIsAlwaysUndefined(V, BC); + // assume_aligned is like a bitcast. + if (CallInst *CI = dyn_cast(Use)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::assume_aligned && + CI->getArgOperand(0) == I) + return passingValueIsAlwaysUndefined(V, CI); + // Load from null is undefined. if (LoadInst *LI = dyn_cast(Use)) return LI->getPointerAddressSpace() == 0; Index: test/Analysis/BasicAA/assume-aligned.ll =================================================================== --- /dev/null +++ test/Analysis/BasicAA/assume-aligned.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +; Check that geps with equal base offsets of noalias base pointers stay noalias. +; (make sure that basicaa can see though assume.aligned intrinsics calls) +define i32 @test(i32* %p, i16 %i) { + %pi = getelementptr i32* %p, i32 0 + %pi.next = getelementptr i32* %p, i32 1 + %b = icmp eq i16 %i, 0 + br i1 %b, label %bb1, label %bb2 + +bb1: + %f = getelementptr i32* %pi, i32 1 + %g = getelementptr i32* %pi.next, i32 1 + br label %bb3 +bb2: + %f2a = getelementptr i32* %pi, i32 1 + %g2a = getelementptr i32* %pi.next, i32 1 + + %f2 = call i32* @llvm.assume.aligned.p0i32.i64(i32* %f2a, i32 32, i64 0) + %g2 = call i32* @llvm.assume.aligned.p0i32.i64(i32* %g2a, i32 32, i64 0) + + br label %bb3 + +bb3: + %ptr_phi = phi i32* [ %f, %bb1 ], [ %f2, %bb2 ] + %ptr_phi2 = phi i32* [ %g, %bb1 ], [ %g2, %bb2 ] +; CHECK: NoAlias: i32* %f1, i32* %g1 + %f1 = getelementptr i32* %ptr_phi , i32 1 + %g1 = getelementptr i32* %ptr_phi2 , i32 1 + + ret i32 0 +} + +declare i32* @llvm.assume.aligned.p0i32.i64(i32*, i32, i64) nounwind readnone + Index: test/Transforms/AssumeAlign/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/AssumeAlign/lit.local.cfg @@ -0,0 +1,2 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + Index: test/Transforms/AssumeAlign/simple.ll =================================================================== --- /dev/null +++ test/Transforms/AssumeAlign/simple.ll @@ -0,0 +1,194 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -assumealign -S | FileCheck %s + +define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i64(i8* %0, i32 32, i64 0) + %1 = bitcast i8* %alignedval to i32* + %2 = load i32* %1, align 4 + ret i32 %2 + +; CHECK: @foo +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly { +entry: + %0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i32(i8* %0, i32 32, i32 8) + %arrayidx = getelementptr inbounds i8* %alignedval, i64 8 + %1 = bitcast i8* %arrayidx to i32* + %2 = load i32* %1, align 4 + ret i32 %2 + +; CHECK: @foo2 +; CHECK: load i32* {{[^,]+}}, align 16 +; CHECK: ret i32 +} + +define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly { +entry: + %0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i32(i8* %0, i32 32, i32 4) + %arrayidx = getelementptr inbounds i8* %alignedval, i64 -4 + %1 = bitcast i8* %arrayidx to i32* + %2 = load i32* %1, align 4 + ret i32 %2 + +; CHECK: @foo2a +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @goo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i32(i8* %0, i32 32, i32 0) + %1 = bitcast i8* %alignedval to i32* + %2 = load i32* %1, align 4 + ret i32 %2 + +; CHECK: @goo +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i64(i8* %0, i32 32, i64 0) + %1 = bitcast i8* %alignedval to i32* + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %1, i64 %indvars.iv + %2 = load i32* %arrayidx, align 4 + %add = add nsw i32 %2, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 8 + %3 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %3, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +; CHECK: @hoo +; CHECK: load i32* %arrayidx, align 32 +; CHECK: ret i32 %add.lcssa +} + +define i32 @joo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i64(i8* %0, i32 32, i64 0) + %1 = bitcast i8* %alignedval to i32* + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %1, i64 %indvars.iv + %2 = load i32* %arrayidx, align 4 + %add = add nsw i32 %2, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 8 + %3 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %3, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +; CHECK: @joo +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @koo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i64(i8* %0, i32 32, i64 0) + %1 = bitcast i8* %alignedval to i32* + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %1, i64 %indvars.iv + %2 = load i32* %arrayidx, align 4 + %add = add nsw i32 %2, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 4 + %3 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %3, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa +; CHECK: @koo +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly { +entry: + %0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i64(i8* %0, i32 32, i64 0) + %1 = bitcast i8* %alignedval to i32* + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %1, i64 %indvars.iv + %2 = load i32* %arrayidx, align 4 + %add = add nsw i32 %2, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 4 + %3 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %3, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +; CHECK: @koo2 +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @moo(i32* nocapture %a) nounwind uwtable { +entry: + %q = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i64(i8* %q, i32 32, i64 0) + tail call void @llvm.memset.p0i8.i64(i8* %alignedval, i8 0, i64 64, i32 4, i1 false) + ret i32 undef +; CHECK: @moo +; CHECK: @llvm.memset.p0i8.i64(i8* %q, i8 0, i64 64, i32 32, i1 false) +; CHECK: ret i32 undef +} + +define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable { +entry: + %i0 = bitcast i32* %a to i8* + %alignedval = tail call i8* @llvm.assume.aligned.p0i8.i64(i8* %i0, i32 32, i64 0) + %i1 = bitcast i32* %b to i8* + %alignedval1 = tail call i8* @llvm.assume.aligned.p0i8.i64(i8* %i1, i32 128, i64 0) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %alignedval, i8* %alignedval1, i64 64, i32 4, i1 false) + ret i32 undef +; CHECK: @moo2 +; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %i0, i8* %i1, i64 64, i32 32, i1 false) +; CHECK: ret i32 undef +} + +declare i8* @llvm.assume.aligned.p0i8.i64(i8*, i32, i64) nounwind readnone +declare i8* @llvm.assume.aligned.p0i8.i32(i8*, i32, i32) nounwind readnone + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +