Index: lib/Target/Hexagon/HexagonTargetTransformInfo.cpp =================================================================== --- lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -41,6 +41,12 @@ void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Runtime = UP.Partial = true; + if (SE.getSmallConstantTripCount(L) == 0 && + SE.getSmallConstantMaxTripCount(L) > 0 && + SE.getSmallConstantMaxTripCount(L) <= 5) { + UP.PeelCount = 2; + UP.AllowPeeling = true; + } } unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const { Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -144,6 +144,8 @@ cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low.")); +extern cl::opt UnrollForcePeelCount; + static cl::opt UnrollUnrollRemainder( "unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled.")); @@ -795,7 +797,10 @@ } // 4th priority is loop peeling - computePeelCount(L, LoopSize, UP, TripCount, SE); + if (UP.PeelCount == 0 || UnrollForcePeelCount.getNumOccurrences() != 0) + // Only compute UP.PeelCount if the target or the user has not reset the + // default value 0. + computePeelCount(L, LoopSize, UP, TripCount, SE); if (UP.PeelCount) { UP.Runtime = false; UP.Count = 1; Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -58,7 +58,7 @@ "unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling.")); -static cl::opt UnrollForcePeelCount( +cl::opt UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); @@ -476,6 +476,10 @@ if (!canPeel(L)) return false; + // Only try to peel innermost loops. + if (!L->empty()) + return false; + LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); Index: test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll @@ -0,0 +1,38 @@ +; RUN: opt -loop-unroll -mtriple=hexagon -S < %s | FileCheck %s +; Check that the loop is peeled twice for Hexagon. +; CHECK: while.body.peel +; CHECK: while.body.peel2 + +%struct.STREAM = type { %union.anon, i32, i32 } +%union.anon = type { i32* } + +define void @function(%struct.STREAM* nocapture readonly %b) local_unnamed_addr { +entry: + %bitPtr3 = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 2 + %0 = load i32, i32* %bitPtr3, align 4 + %cmp11 = icmp ult i32 %0, 32 + br i1 %cmp11, label %while.body.preheader, label %do.end + +while.body.preheader: + %value2 = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 1 + %1 = load i32, i32* %value2, align 4 + %w = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 0, i32 0 + %2 = load i32*, i32** %w, align 4 + br label %while.body + +while.body: + %bitPtr.014 = phi i32 [ %add, %while.body ], [ %0, %while.body.preheader ] + %value.013 = phi i32 [ %shl, %while.body ], [ %1, %while.body.preheader ] + %ptr.012 = phi i32* [ %incdec.ptr, %while.body ], [ %2, %while.body.preheader ] + %add = add nuw i32 %bitPtr.014, 8 + %shr = lshr i32 %value.013, 24 + %incdec.ptr = getelementptr inbounds i32, i32* %ptr.012, i32 1 + store i32 %shr, i32* %ptr.012, align 4 + %shl = shl i32 %value.013, 8 + %cmp = icmp ult i32 %add, 17 + br i1 %cmp, label %while.body, label %do.end + +do.end: + ret void +} +