Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -490,6 +490,8 @@ bool UpperBound; /// Allow peeling off loop iterations. bool AllowPeeling; + /// Allow peeling off loop iterations for loop nests. + bool AllowLoopNestsPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -154,6 +154,10 @@ cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low.")); +static cl::opt UnrollAllowLoopNestsPeeling( + "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + static cl::opt UnrollUnrollRemainder( "unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled.")); @@ -204,6 +208,7 @@ UP.Force = false; UP.UpperBound = false; UP.AllowPeeling = true; + UP.AllowLoopNestsPeeling = false; UP.UnrollAndJam = false; UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; @@ -243,6 +248,8 @@ UP.UpperBound = false; if (UnrollAllowPeeling.getNumOccurrences() > 0) UP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; if (UnrollUnrollRemainder.getNumOccurrences() > 0) UP.UnrollRemainder = UnrollUnrollRemainder; Index: llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -290,7 +290,7 @@ return; // Only try to peel innermost loops. - if (!L->empty()) + if (!UP.AllowLoopNestsPeeling && !L->empty()) return; // If the user provided a peel count, use that. Index: llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -unroll-allow-loop-nests-peeling -verify-dom-info | FileCheck %s --check-prefix LOOP-NEST declare void @f1() declare void @f2() @@ -433,6 +434,88 @@ ; CHECK: for.end: ; CHECK-NEXT: ret void ; +; LOOP-NEST-LABEL: @test5( +; LOOP-NEST-NEXT: for.body.lr.ph: +; LOOP-NEST-NEXT: br label [[OUTER_HEADER_PEEL_BEGIN:%.*]] +; LOOP-NEST: outer.header.peel.begin: +; LOOP-NEST-NEXT: br label [[OUTER_HEADER_PEEL:%.*]] +; LOOP-NEST: outer.header.peel: +; LOOP-NEST-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; LOOP-NEST: for.body.peel: +; LOOP-NEST-NEXT: [[I_05_PEEL:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL]] ], [ [[INC_PEEL:%.*]], [[FOR_INC_PEEL:%.*]] ] +; LOOP-NEST-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2 +; LOOP-NEST-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]] +; LOOP-NEST: if.else.peel: +; LOOP-NEST-NEXT: call void @f2() +; LOOP-NEST-NEXT: br label [[FOR_INC_PEEL]] +; LOOP-NEST: if.then.peel: +; LOOP-NEST-NEXT: call void @f1() +; LOOP-NEST-NEXT: br label [[FOR_INC_PEEL]] +; LOOP-NEST: for.inc.peel: +; LOOP-NEST-NEXT: [[INC_PEEL]] = add nsw i32 [[I_05_PEEL]], 1 +; LOOP-NEST-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; LOOP-NEST-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL]], label [[OUTER_INC_PEEL:%.*]] +; LOOP-NEST: outer.inc.peel: +; LOOP-NEST-NEXT: [[J_INC_PEEL:%.*]] = add nsw i32 0, 1 +; LOOP-NEST-NEXT: [[OUTER_CMP_PEEL:%.*]] = icmp slt i32 [[J_INC_PEEL]], [[K]] +; LOOP-NEST-NEXT: br i1 [[OUTER_CMP_PEEL]], label [[OUTER_HEADER_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]] +; Verify that MD_loop metadata is dropped. +; LOOP-NEST-NOT: , !llvm.loop !{{[0-9]*}} +; LOOP-NEST: outer.header.peel.next: +; LOOP-NEST-NEXT: br label [[OUTER_HEADER_PEEL2:%.*]] +; LOOP-NEST: outer.header.peel2: +; LOOP-NEST-NEXT: br label [[FOR_BODY_PEEL3:%.*]] +; LOOP-NEST: for.body.peel3: +; LOOP-NEST-NEXT: [[I_05_PEEL4:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL2]] ], [ [[INC_PEEL9:%.*]], [[FOR_INC_PEEL8:%.*]] ] +; LOOP-NEST-NEXT: [[CMP1_PEEL5:%.*]] = icmp ult i32 [[J_INC_PEEL]], 2 +; LOOP-NEST-NEXT: br i1 [[CMP1_PEEL5]], label [[IF_THEN_PEEL7:%.*]], label [[IF_ELSE_PEEL6:%.*]] +; LOOP-NEST: if.else.peel6: +; LOOP-NEST-NEXT: call void @f2() +; LOOP-NEST-NEXT: br label [[FOR_INC_PEEL8]] +; LOOP-NEST: if.then.peel7: +; LOOP-NEST-NEXT: call void @f1() +; LOOP-NEST-NEXT: br label [[FOR_INC_PEEL8]] +; LOOP-NEST: for.inc.peel8: +; LOOP-NEST-NEXT: [[INC_PEEL9]] = add nsw i32 [[I_05_PEEL4]], 1 +; LOOP-NEST-NEXT: [[CMP_PEEL10:%.*]] = icmp slt i32 [[INC_PEEL9]], [[K]] +; LOOP-NEST-NEXT: br i1 [[CMP_PEEL10]], label [[FOR_BODY_PEEL3]], label [[OUTER_INC_PEEL11:%.*]] +; LOOP-NEST: outer.inc.peel11: +; LOOP-NEST-NEXT: [[J_INC_PEEL12:%.*]] = add nsw i32 [[J_INC_PEEL]], 1 +; LOOP-NEST-NEXT: [[OUTER_CMP_PEEL13:%.*]] = icmp slt i32 [[J_INC_PEEL12]], [[K]] +; LOOP-NEST-NEXT: br i1 [[OUTER_CMP_PEEL13]], label [[OUTER_HEADER_PEEL_NEXT1:%.*]], label [[FOR_END]] +; Verify that MD_loop metadata is dropped. +; LOOP-NEST-NOT: , !llvm.loop !{{[0-9]*}} +; LOOP-NEST: outer.header.peel.next1: +; LOOP-NEST-NEXT: br label [[OUTER_HEADER_PEEL_NEXT14:%.*]] +; LOOP-NEST: outer.header.peel.next14: +; LOOP-NEST-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; LOOP-NEST: for.body.lr.ph.peel.newph: +; LOOP-NEST-NEXT: br label [[OUTER_HEADER:%.*]] +; LOOP-NEST: outer.header: +; LOOP-NEST-NEXT: [[J:%.*]] = phi i32 [ [[J_INC_PEEL12]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ] +; LOOP-NEST-NEXT: br label [[FOR_BODY:%.*]] +; LOOP-NEST: for.body: +; LOOP-NEST-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; LOOP-NEST-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; LOOP-NEST: if.then: +; LOOP-NEST-NEXT: call void @f1() +; LOOP-NEST-NEXT: br label [[FOR_INC]] +; LOOP-NEST: if.else: +; LOOP-NEST-NEXT: call void @f2() +; LOOP-NEST-NEXT: br label [[FOR_INC]] +; LOOP-NEST: for.inc: +; LOOP-NEST-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; LOOP-NEST-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; LOOP-NEST-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]] +; LOOP-NEST: outer.inc: +; LOOP-NEST-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 +; LOOP-NEST-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]] +; LOOP-NEST-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}} +; LOOP-NEST: for.end.loopexit: +; LOOP-NEST-NEXT: br label [[FOR_END]] +; LOOP-NEST: for.end: +; LOOP-NEST-NEXT: ret void +; for.body.lr.ph: br label %outer.header