Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -4279,6 +4279,18 @@ !0 = !{!"llvm.loop.unroll.runtime.disable"} +'``llvm.loop.unroll.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata suggests that the loop should be fully unrolled if the trip count +is known at compile time and partially unrolled if the trip count is not known +at compile time. The metadata has a single operand which is the string +``llvm.loop.unroll.enable``. For example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.unroll.enable"} + '``llvm.loop.unroll.full``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -186,7 +186,7 @@ // total unrolled size. Parameters Threshold and PartialThreshold // are set to the maximum unrolled size for fully and partially // unrolled loops respectively. - void selectThresholds(const Loop *L, bool HasPragma, + void selectThresholds(const Loop *L, bool UsePragmaThreshold, const TargetTransformInfo::UnrollingPreferences &UP, unsigned &Threshold, unsigned &PartialThreshold, unsigned &PercentDynamicCostSavedThreshold, @@ -212,7 +212,7 @@ Threshold = UP.OptSizeThreshold; PartialThreshold = UP.PartialOptSizeThreshold; } - if (HasPragma) { + if (UsePragmaThreshold) { // If the loop has an unrolling pragma, we want to be more // aggressive with unrolling limits. Set thresholds to at // least the PragmaTheshold value which is larger than the @@ -689,6 +689,12 @@ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); } +// Returns true if the loop has an unroll(enable) pragma. This metadata is used +// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives. +static bool HasUnrollEnablePragma(const Loop *L) { + return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); +} + // Returns true if the loop has an unroll(disable) pragma. static bool HasUnrollDisablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); @@ -863,8 +869,9 @@ return false; } bool PragmaFullUnroll = HasUnrollFullPragma(L); + bool PragmaEnableUnroll = HasUnrollEnablePragma(L); unsigned PragmaCount = UnrollCountPragmaValue(L); - bool HasPragma = PragmaFullUnroll || PragmaCount > 0; + bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0; TargetTransformInfo::UnrollingPreferences UP; getUnrollingPreferences(L, TTI, UP); @@ -912,7 +919,15 @@ unsigned Threshold, PartialThreshold; unsigned PercentDynamicCostSavedThreshold; unsigned DynamicCostSavingsDiscount; - selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold, + // Only use the high pragma threshold when we have a target unroll factor such + // as with "#pragma unroll N" or a pragma indicating full unrolling and the + // trip count is known. Otherwise we rely on the standard threshold to + // heuristically select a reasonable unroll count. + bool UsePragmaThreshold = + PragmaCount > 0 || + ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0); + + selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold, PercentDynamicCostSavedThreshold, DynamicCostSavingsDiscount); @@ -946,14 +961,15 @@ // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; - bool AllowRuntime = - (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime); + bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || + (UserRuntime ? CurrentRuntime : UP.Runtime); // Don't unroll a runtime trip count loop with unroll full pragma. if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { AllowRuntime = false; } if (Unrolling == Partial) { - bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; + bool AllowPartial = PragmaEnableUnroll || + (UserAllowPartial ? CurrentAllowPartial : UP.Partial); if (!AllowPartial && !CountSetExplicitly) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); @@ -993,23 +1009,27 @@ DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); - if (PragmaFullUnroll && PragmaCount == 0) { - if (TripCount && Count != TripCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll(full) pragma " - "because unrolled size is too large."); - } else if (!TripCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll(full) pragma " - "because loop has a runtime trip count."); - } - } else if (PragmaCount > 0 && Count != OriginalCount) { + if ((PragmaCount > 0) && Count != OriginalCount) { emitOptimizationRemarkMissed( Ctx, DEBUG_TYPE, *F, LoopLoc, "Unable to unroll loop the number of times directed by " "unroll_count pragma because unrolled size is too large."); + } else if (PragmaFullUnroll && !TripCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to fully unroll loop as directed by unroll(full) pragma " + "because loop has a runtime trip count."); + } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to unroll loop as directed by unroll(enable) pragma because " + "unrolled size is too large."); + } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && + Count != TripCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to fully unroll loop as directed by unroll pragma because " + "unrolled size is too large."); } } Index: test/Transforms/LoopUnroll/unroll-pragmas.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-pragmas.ll +++ test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -256,3 +256,69 @@ ret void } !12 = !{!12, !4} + +; #pragma clang loop unroll(enable) +; Loop should be fully unrolled. +; +; CHECK-LABEL: @loop64_with_enable( +; CHECK-NOT: br i1 +define void @loop64_with_enable(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 + +for.end: ; preds = %for.body + ret void +} +!13 = !{!13, !14} +!14 = !{!"llvm.loop.unroll.enable"} + +; #pragma clang loop unroll(enable) +; Loop has a runtime trip count and should be runtime unrolled and duplicated +; (original and 8x). +; +; CHECK-LABEL: @runtime_loop_with_enable( +; CHECK: for.body.prol: +; CHECK: store +; CHECK-NOT: store +; CHECK: br i1 +; CHECK: for.body: +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK: store i32 +; CHECK-NOT: store i32 +; CHECK: br i1 +define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { +entry: + %cmp3 = icmp sgt i32 %b, 0 + br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %b + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 + +for.end: ; preds = %for.body, %entry + ret void +} +!15 = !{!15, !14}