Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -311,6 +311,7 @@ void initializeFuncletLayoutPass(PassRegistry &); void initializeLoopLoadEliminationPass(PassRegistry&); void initializeFunctionImportPassPass(PassRegistry &); +void initializeLoopVersioningPassPass(PassRegistry &); } #endif Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -493,6 +493,12 @@ // FunctionPass *createLoopLoadEliminationPass(); +//===----------------------------------------------------------------------===// +// +// LoopVersioning - Perform loop multi-versioning. +// +FunctionPass *createLoopVersioningPass(); + } // End llvm namespace #endif Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -84,6 +84,7 @@ initializeFloat2IntPass(Registry); initializeLoopDistributePass(Registry); initializeLoopLoadEliminationPass(Registry); + initializeLoopVersioningPassPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { Index: lib/Transforms/Utils/LoopVersioning.cpp =================================================================== --- lib/Transforms/Utils/LoopVersioning.cpp +++ lib/Transforms/Utils/LoopVersioning.cpp @@ -145,3 +145,77 @@ PN->addIncoming(NonVersionedLoopInst, NonVersionedLoop->getExitingBlock()); } } + +namespace { +/// \brief Also expose this is a pass. Currently this is only used for +/// unit-testing. It adds all memchecks necessary to remove all may-aliasing +/// array accesses from the loop. +class LoopVersioningPass : public FunctionPass { +public: + LoopVersioningPass() : FunctionPass(ID) { + initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *LI = &getAnalysis().getLoopInfo(); + auto *LAA = &getAnalysis(); + auto *DT = &getAnalysis().getDomTree(); + auto *SE = &getAnalysis().getSE(); + + // Build up a worklist of inner-loops to version. This is necessary as the + // act of versioning a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->empty()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap()); + if (LAI.getNumRuntimePointerChecks() || + !LAI.PSE.getUnionPredicate().isAlwaysTrue()) { + LoopVersioning LVer(LAI, L, LI, DT, SE); + LVer.versionLoop(); + Changed = true; + } + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + } + + static char ID; +}; +} + +#define LVER_OPTION "loop-versioning" +#define DEBUG_TYPE LVER_OPTION + +char LoopVersioningPass::ID; +static const char LVer_name[] = "Loop Versioning"; + +INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) + +namespace llvm { +FunctionPass *createLoopVersioningPass() { + return new LoopVersioningPass(); +} +} Index: test/Transforms/LoopVersioning/basic.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVersioning/basic.ll @@ -0,0 +1,47 @@ +; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Version this loop with overlap checks between a, c and b, c. + +define void @f(i32* %a, i32* %b, i32* %c) { +entry: + br label %for.body + +; CHECK: for.body.lver.check: +; CHECK: icmp +; CHECK: icmp +; CHECK: icmp +; CHECK: icmp +; CHECK-NOT: icmp +; CHECK: br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph + +; CHECK: for.body.ph.lver.orig: +; CHECK: for.body.lver.orig: +; CHECK: br i1 %exitcond.lver.orig, label %for.end, label %for.body.lver.orig +; CHECK: for.body.ph: +; CHECK: for.body: +; CHECK: br i1 %exitcond, label %for.end, label %for.body +; CHECK: for.end: + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulC = mul i32 %loadA, %loadB + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind + store i32 %mulC, i32* %arrayidxC, align 4 + + %add = add nuw nsw i64 %ind, 1 + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +}