Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -35,6 +35,9 @@ /** See llvm::createAggressiveDCEPass function. */ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM); +/** See llvm::createAlignmentFromAssumptionsPass function. */ +void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM); + /** See llvm::createCFGSimplificationPass function. */ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -73,6 +73,7 @@ void initializeArgPromotionPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeSampleProfileLoaderPass(PassRegistry&); +void initializeAlignmentFromAssumptionsPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAliasAnalysisPass(PassRegistry&); void initializeCallGraphWrapperPassPass(PassRegistry &); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -52,6 +52,7 @@ (void) llvm::createAliasAnalysisCounterPass(); (void) llvm::createAliasDebugger(); (void) llvm::createArgumentPromotionPass(); + (void) llvm::createAlignmentFromAssumptionsPass(); (void) llvm::createBasicAliasAnalysisPass(); (void) llvm::createLibCallAliasAnalysisPass(nullptr); (void) llvm::createScalarEvolutionAliasAnalysisPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -36,6 +36,13 @@ //===----------------------------------------------------------------------===// // +// AlignmentFromAssumptions - Use assume intrinsics to set load/store +// alignments. +// +FunctionPass *createAlignmentFromAssumptionsPass(); + +//===----------------------------------------------------------------------===// +// // SCCP - Sparse conditional constant propagation. // FunctionPass *createSCCPPass(); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -310,6 +310,10 @@ if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops + // After vectorization and unrolling, assume intrinsics may tell us more + // about pointer alignments. + MPM.add(createAlignmentFromAssumptionsPass()); + if (!DisableUnitAtATime) { // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes @@ -399,6 +403,10 @@ // More scalar chains could be vectorized due to more alias information PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + // After vectorization, assume intrinsics may tell us more about pointer + // alignments. + PM.add(createAlignmentFromAssumptionsPass()); + if (LoadCombine) PM.add(createLoadCombinePass()); Index: lib/Transforms/Scalar/AlignmentFromAssumptions.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -0,0 +1,420 @@ +//===----------------------- AlignmentFromAssumptions.cpp -----------------===// +// Set Load/Store Alignments From Assumptions +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a ScalarEvolution-based transformation to set +// the alignments of load, stores and memory intrinsics based on the truth +// expressions of assume intrinsics. The primary motivation is to handle +// complex alignment assumptions that apply to vector loads and stores that +// appear after vectorization and unrolling. +// +//===----------------------------------------------------------------------===// + +#define AA_NAME "alignment-from-assumptions" +#define DEBUG_TYPE AA_NAME +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumLoadAlignChanged, + "Number of loads changed by alignment assumptions"); +STATISTIC(NumStoreAlignChanged, + "Number of stores changed by alignment assumptions"); +STATISTIC(NumMemIntAlignChanged, + "Number of memory intrinsics changed by alignment assumptions"); + +namespace { +struct AlignmentFromAssumptions : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + AlignmentFromAssumptions() : FunctionPass(ID) { + initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + } + + // For memory transfers, we need a common alignment for both the source and + // destination. If we have a new alignment for only one operand of a transfer + // instruction, save it in these maps. If we reach the other operand through + // another assumption later, then we may change the alignment at that point. + DenseMap NewDestAlignments, NewSrcAlignments; + + AssumptionTracker *AT; + ScalarEvolution *SE; + DominatorTree *DT; + const DataLayout *DL; + + bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV, + const SCEV *&OffSCEV); + bool processAssumption(CallInst *I); +}; +} + +char AlignmentFromAssumptions::ID = 0; +static const char aip_name[] = "Alignment from assumptions"; +INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME, + aip_name, false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME, + aip_name, false, false) + +FunctionPass *llvm::createAlignmentFromAssumptionsPass() { + return new AlignmentFromAssumptions(); +} + +// Given an expression for the (constant) alignment, AlignSCEV, and an +// expression for the displacement between a pointer and the aligned address, +// DiffSCEV, compute the alignment of the displaced pointer if it can be +// reduced to a constant. +static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV, + const SCEV *AlignSCEV, + ScalarEvolution *SE) { + // DiffUnits = Diff % int64_t(Alignment) + const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV); + const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV); + const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV); + + DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " << + *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n"); + + if (const SCEVConstant *ConstDUSCEV = + dyn_cast(DiffUnitsSCEV)) { + int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue(); + + // If the displacement is an exact multiple of the alignment, then the + // displaced pointer has the same alignment as the aligned pointer, so + // return the alignment value. + if (!DiffUnits) + return (unsigned) + cast(AlignSCEV)->getValue()->getSExtValue(); + + // If the displacement is not an exact multiple, but the remainder is a + // constant, then return this remainder (but only if it is a power of 2). + uint64_t DiffUnitsAbs = abs64(DiffUnits); + if (isPowerOf2_64(DiffUnitsAbs)) + return (unsigned) DiffUnitsAbs; + } + + return 0; +} + +// There is an address given by an offset OffSCEV from AASCEV which has an +// alignment AlignSCEV. Use that information, if possible, to compute a new +// alignment for Ptr. +static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV, + const SCEV *OffSCEV, Value *Ptr, + ScalarEvolution *SE) { + const SCEV *PtrSCEV = SE->getSCEV(Ptr); + const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV); + + // What we really want to know is the overall offset to the aligned + // address. This address is displaced by the provided offset. + DiffSCEV = SE->getMinusSCEV(DiffSCEV, OffSCEV); + + DEBUG(dbgs() << "AFI: alignment of " << *Ptr << " relative to " << + *AlignSCEV << " and offset " << *OffSCEV << + " using diff " << *DiffSCEV << "\n"); + + unsigned NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE); + DEBUG(dbgs() << "\tnew alignment: " << NewAlignment << "\n"); + + if (NewAlignment) { + return NewAlignment; + } else if (const SCEVAddRecExpr *DiffARSCEV = + dyn_cast(DiffSCEV)) { + // The relative offset to the alignment assumption did not yield a constant, + // but we should try harder: if we assume that a is 32-byte aligned, then in + // for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are + // 32-byte aligned, but instead alternate between 32 and 16-byte alignment. + // As a result, the new alignment will not be a constant, but can still + // be improved over the default (of 4) to 16. + + const SCEV *DiffStartSCEV = DiffARSCEV->getStart(); + const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE); + + DEBUG(dbgs() << "\ttrying start/inc alignment using start " << + *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n"); + + // Now compute the new alignment using the displacement to the value in the + // first iteration, and also the alignment using the per-iteration delta. + // If these are the same, then use that answer. Otherwise, use the smaller + // one, but only if it divides the larger one. + NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE); + unsigned NewIncAlignment = getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE); + + DEBUG(dbgs() << "\tnew start alignment: " << NewAlignment << "\n"); + DEBUG(dbgs() << "\tnew inc alignment: " << NewIncAlignment << "\n"); + + if (NewAlignment > NewIncAlignment) { + if (NewAlignment % NewIncAlignment == 0) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewIncAlignment << "\n"); + return NewIncAlignment; + } + } else if (NewIncAlignment > NewAlignment) { + if (NewIncAlignment % NewAlignment == 0) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewAlignment << "\n"); + return NewAlignment; + } + } else if (NewIncAlignment == NewAlignment && NewIncAlignment) { + DEBUG(dbgs() << "\tnew start/inc alignment: " << + NewAlignment << "\n"); + return NewAlignment; + } + } + + return 0; +} + +bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I, + Value *&AAPtr, const SCEV *&AlignSCEV, + const SCEV *&OffSCEV) { + // An alignment assume must be a statement about the least-significant + // bits of the pointer being zero, possibly with some offset. + ICmpInst *ICI = dyn_cast(I->getArgOperand(0)); + if (!ICI) + return false; + + // This must be an expression of the form: x & m == 0. + if (ICI->getPredicate() != ICmpInst::ICMP_EQ) + return false; + + // Swap things around so that the RHS is 0. + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS); + const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS); + if (CmpLHSSCEV->isZero()) + std::swap(CmpLHS, CmpRHS); + else if (!CmpRHSSCEV->isZero()) + return false; + + BinaryOperator *CmpBO = dyn_cast(CmpLHS); + if (!CmpBO || CmpBO->getOpcode() != Instruction::And) + return false; + + // Swap things around so that the right operand of the and is a constant + // (the mask); we cannot deal with variable masks. + Value *AndLHS = CmpBO->getOperand(0); + Value *AndRHS = CmpBO->getOperand(1); + const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS); + const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS); + if (isa(AndLHSSCEV)) { + std::swap(AndLHS, AndRHS); + std::swap(AndLHSSCEV, AndRHSSCEV); + } + + const SCEVConstant *MaskSCEV = dyn_cast(AndRHSSCEV); + if (!MaskSCEV) + return false; + + // The mask must have some trailing ones (otherwise the condition is + // trivial and tells us nothing about the alignment of the left operand). + unsigned TrailingOnes = + MaskSCEV->getValue()->getValue().countTrailingOnes(); + if (!TrailingOnes) + return false; + + // Cap the alignment at the maximum with which LLVM can deal (and make sure + // we don't overflow the shift). + uint64_t Alignment; + TrailingOnes = std::min(TrailingOnes, + unsigned(sizeof(unsigned) * CHAR_BIT - 1)); + Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment); + + Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext()); + AlignSCEV = SE->getConstant(Int64Ty, Alignment); + + // The LHS might be a ptrtoint instruction, or it might be the pointer + // with an offset. + AAPtr = nullptr; + OffSCEV = nullptr; + if (PtrToIntInst *PToI = dyn_cast(AndLHS)) { + AAPtr = PToI->getPointerOperand(); + OffSCEV = SE->getConstant(Int64Ty, 0); + } else if (const SCEVAddExpr* AndLHSAddSCEV = + dyn_cast(AndLHSSCEV)) { + // Try to find the ptrtoint; subtract it and the rest is the offset. + for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(), + JE = AndLHSAddSCEV->op_end(); J != JE; ++J) + if (const SCEVUnknown *OpUnk = dyn_cast(*J)) + if (PtrToIntInst *PToI = dyn_cast(OpUnk->getValue())) { + AAPtr = PToI->getPointerOperand(); + OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J); + break; + } + } + + if (!AAPtr) + return false; + + // Sign extend the offset to 64 bits (so that it is like all of the other + // expressions). + unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits(); + if (OffSCEVBits < 64) + OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty); + else if (OffSCEVBits > 64) + return false; + + AAPtr = AAPtr->stripPointerCasts(); + return true; +} + +bool AlignmentFromAssumptions::processAssumption(CallInst *I) { + Value *AAPtr; + const SCEV *AlignSCEV, *OffSCEV; + if (!extractAlignmentInfo(I, AAPtr, AlignSCEV, OffSCEV)) + return false; + + const SCEV *AASCEV = SE->getSCEV(AAPtr); + + // Apply the assumption to all other users of the specified pointer. + SmallPtrSet Visited; + SmallVector WorkList; + for (User *J : AAPtr->users()) { + if (J == I) + continue; + + if (Instruction *K = dyn_cast(J)) + if (isValidAssumeForContext(I, K, DL, DT)) + WorkList.push_back(K); + } + + while (!WorkList.empty()) { + Instruction *J = WorkList.pop_back_val(); + + if (LoadInst *LI = dyn_cast(J)) { + unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + LI->getPointerOperand(), SE); + + if (NewAlignment > LI->getAlignment()) { + LI->setAlignment(NewAlignment); + ++NumLoadAlignChanged; + } + } else if (StoreInst *SI = dyn_cast(J)) { + unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + SI->getPointerOperand(), SE); + + if (NewAlignment > SI->getAlignment()) { + SI->setAlignment(NewAlignment); + ++NumStoreAlignChanged; + } + } else if (MemIntrinsic *MI = dyn_cast(J)) { + unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + MI->getDest(), SE); + + // For memory transfers, we need a common alignment for both the + // source and destination. If we have a new alignment for this + // instruction, but only for one operand, save it. If we reach the + // other operand through another assumption later, then we may + // change the alignment at that point. + if (MemTransferInst *MTI = dyn_cast(MI)) { + unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, + MTI->getSource(), SE); + + DenseMap::iterator DI = + NewDestAlignments.find(MTI); + unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ? + 0 : DI->second; + + DenseMap::iterator SI = + NewSrcAlignments.find(MTI); + unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ? + 0 : SI->second; + + DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " << + AltDestAlignment << " " << NewSrcAlignment << + " " << AltSrcAlignment << "\n"); + + // If these four alignments, pick the largest possible... + unsigned NewAlignment = 0; + if (NewDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment)) + NewAlignment = std::max(NewAlignment, NewDestAlignment); + if (AltDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment)) + NewAlignment = std::max(NewAlignment, AltDestAlignment); + if (NewSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment)) + NewAlignment = std::max(NewAlignment, NewSrcAlignment); + if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment)) + NewAlignment = std::max(NewAlignment, AltSrcAlignment); + + if (NewAlignment > MI->getAlignment()) { + MI->setAlignment(ConstantInt::get(Type::getInt32Ty( + MI->getParent()->getContext()), NewAlignment)); + ++NumMemIntAlignChanged; + } + + NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment)); + NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment)); + } else if (NewDestAlignment > MI->getAlignment()) { + assert((!isa(MI) || isa(MI)) && + "Unknown memory intrinsic"); + + MI->setAlignment(ConstantInt::get(Type::getInt32Ty( + MI->getParent()->getContext()), NewDestAlignment)); + ++NumMemIntAlignChanged; + } + } + + // Now that we've updated that use of the pointer, look for other uses of + // the pointer to update. + Visited.insert(J); + for (User *UJ : J->users()) { + Instruction *K = cast(UJ); + if (!Visited.count(K) && isValidAssumeForContext(I, K, DL, DT)) + WorkList.push_back(K); + } + } + + return true; +} + +bool AlignmentFromAssumptions::runOnFunction(Function &F) { + bool Changed = false; + AT = &getAnalysis(); + SE = &getAnalysis(); + DT = &getAnalysis().getDomTree(); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + + NewDestAlignments.clear(); + NewSrcAlignments.clear(); + + for (auto &I : AT->assumptions(&F)) + Changed |= processAssumption(I); + + return Changed; +} + Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp + AlignmentFromAssumptions.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -28,6 +28,7 @@ /// ScalarOpts library. void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCEPass(Registry); + initializeAlignmentFromAssumptionsPass(Registry); initializeSampleProfileLoaderPass(Registry); initializeConstantHoistingPass(Registry); initializeConstantPropagationPass(Registry); @@ -78,6 +79,10 @@ unwrap(PM)->add(createAggressiveDCEPass()); } +void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createAlignmentFromAssumptionsPass()); +} + void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCFGSimplificationPass()); } Index: test/Transforms/AlignmentFromAssumptions/simple.ll =================================================================== --- /dev/null +++ test/Transforms/AlignmentFromAssumptions/simple.ll @@ -0,0 +1,215 @@ +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s + +define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %0 = load i32* %a, align 4 + ret i32 %0 + +; CHECK-LABEL: @foo +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %offsetptr = add i64 %ptrint, 24 + %maskedptr = and i64 %offsetptr, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %arrayidx = getelementptr inbounds i32* %a, i64 2 + %0 = load i32* %arrayidx, align 4 + ret i32 %0 + +; CHECK-LABEL: @foo2 +; CHECK: load i32* {{[^,]+}}, align 16 +; CHECK: ret i32 +} + +define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %offsetptr = add i64 %ptrint, 28 + %maskedptr = and i64 %offsetptr, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %arrayidx = getelementptr inbounds i32* %a, i64 -1 + %0 = load i32* %arrayidx, align 4 + ret i32 %0 + +; CHECK-LABEL: @foo2a +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @goo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %0 = load i32* %a, align 4 + ret i32 %0 + +; CHECK-LABEL: @goo +; CHECK: load i32* {{[^,]+}}, align 32 +; CHECK: ret i32 +} + +define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 8 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +; CHECK-LABEL: @hoo +; CHECK: load i32* %arrayidx, align 32 +; CHECK: ret i32 %add.lcssa +} + +define i32 @joo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 8 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +; CHECK-LABEL: @joo +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @koo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 4 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +; CHECK-LABEL: @koo +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ] + %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %r.06 + %indvars.iv.next = add i64 %indvars.iv, 4 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, 2048 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +; CHECK-LABEL: @koo2 +; CHECK: load i32* %arrayidx, align 16 +; CHECK: ret i32 %add.lcssa +} + +define i32 @moo(i32* nocapture %a) nounwind uwtable { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %0 = bitcast i32* %a to i8* + tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false) + ret i32 undef + +; CHECK-LABEL: @moo +; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false) +; CHECK: ret i32 undef +} + +define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable { +entry: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %ptrint1 = ptrtoint i32* %b to i64 + %maskedptr3 = and i64 %ptrint1, 127 + %maskcond4 = icmp eq i64 %maskedptr3, 0 + tail call void @llvm.assume(i1 %maskcond4) + %0 = bitcast i32* %a to i8* + %1 = bitcast i32* %b to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false) + ret i32 undef + +; CHECK-LABEL: @moo2 +; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false) +; CHECK: ret i32 undef +} + +declare void @llvm.assume(i1) nounwind + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +