diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -184,7 +184,8 @@ int Count = -1, int AllowPartial = -1, int Runtime = -1, int UpperBound = -1, int AllowPeeling = -1); -// Create an unrolling pass for full unrolling that uses exact trip count only. +// Create an unrolling pass for full unrolling that uses exact trip count only +// and also does peeling. Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false); diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h --- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -22,7 +22,7 @@ class Loop; class LPMUpdater; -/// Loop unroll pass that only does full loop unrolling. +/// Loop unroll pass that only does full loop unrolling and peeling. class LoopFullUnrollPass : public PassInfoMixin { const int OptLevel; diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -453,7 +453,7 @@ MPM.add(createLoopSimplifyCFGPass()); } - // Unroll small loops + // Unroll small loops and perform peeling. MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); addExtensionsToPM(EP_LoopOptimizerEnd, MPM); @@ -1046,7 +1046,7 @@ if (EnableLoopFlatten) PM.add(createLoopFlattenPass()); - // Unroll small loops + // Unroll small loops and perform peeling. PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); PM.add(createLoopVectorizePass(true, !LoopVectorize)); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1299,7 +1299,7 @@ Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV) { return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1, - 0, 0, 0, 0); + 0, 0, 0, 1); } PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, @@ -1327,7 +1327,7 @@ OnlyWhenForced, ForgetSCEV, /*Count*/ None, /*Threshold*/ None, /*AllowPartial*/ false, /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ false, + /*AllowPeeling*/ true, /*AllowProfileBasedPeeling*/ false, /*FullUnrollMaxCount*/ None) != LoopUnrollResult::Unmodified; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/peel-before-lv-to-enable-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/peel-before-lv-to-enable-vectorization.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/peel-before-lv-to-enable-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/peel-before-lv-to-enable-vectorization.ll @@ -11,7 +11,9 @@ define i32 @test(i32* readonly %p, i32* readnone %q) { ; CHECK-LABEL: define i32 @test( -; CHECK-NOT: vector.body +; CHECK: vector.body: +; CHECK: %index.next = add i64 %index, 8 +; CHECK: middle.block: ; entry: %cmp.not7 = icmp eq i32* %p, %q