diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5978,6 +5978,19 @@ partial/runtime unrolling will have. See :ref:`Transformation Metadata ` for details. +'``llvm.loop.unroll.threshold``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata suggests a default unroll threshold value for the loop. This +metadata is used when setting the target specific unroll options rather than +within the core unroll pass, so only has effect for targets that make use of it. +This metadata has a single integer operand specifying the threshold value, for +example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.unroll.threshold", i32 250} + '``llvm.loop.unroll_and_jam``' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -117,6 +117,26 @@ const unsigned MaxAlloca = (256 - 16) * 4; unsigned ThresholdPrivate = UnrollThresholdPrivate; unsigned ThresholdLocal = UnrollThresholdLocal; + + // If this loop has the llvm.loop.unroll.threshold metadata we will use the + // provided threshold value as the default for Threshold + if (MDNode *LoopUnrollThreshold = + findOptionMDForLoop(L, "llvm.loop.unroll.threshold")) { + if (LoopUnrollThreshold->getNumOperands() == 2) { + ConstantInt *MetaThresholdValue = mdconst::extract_or_null( + LoopUnrollThreshold->getOperand(1)); + if (MetaThresholdValue) { + // We will also use the supplied value for PartialThreshold for now. + // We may introduce additional metadata if it becomes necessary in the + // future. + UP.Threshold = MetaThresholdValue->getSExtValue(); + UP.PartialThreshold = UP.Threshold; + ThresholdPrivate = std::min(ThresholdPrivate, UP.Threshold); + ThresholdLocal = std::min(ThresholdLocal, UP.Threshold); + } + } + } + unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal); for (const BasicBlock *BB : L->getBlocks()) { const DataLayout &DL = BB->getModule()->getDataLayout(); diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-threshold.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-threshold.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-threshold.ll @@ -0,0 +1,113 @@ +; RUN: opt < %s -S -mtriple=amdgcn-- -loop-unroll | FileCheck %s + +; Check the handling of llvm.loop.unroll.threshold metadata which can be used to +; set the default threshold for a loop. This metadata overrides both the AMDGPU +; default, and any value specified by the amdgpu-unroll-threshold function attribute +; (which sets a threshold for all loops in the function). + +; Check that the loop in unroll_default is not fully unrolled using the default +; unroll threshold +; CHECK-LABEL: @unroll_default +; CHECK: entry: +; CHECK: br i1 %cmp +; CHECK: ret void + +@in = internal unnamed_addr global i32* null, align 8 +@out = internal unnamed_addr global i32* null, align 8 + +define void @unroll_default() { +entry: + br label %do.body + +do.body: ; preds = %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 + store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} + +; Check that the same loop in unroll_full is fully unrolled when the default +; unroll threshold is increased by use of the llvm.loop.unroll.threshold metadata +; CHECK-LABEL: @unroll_full +; CHECK: entry: +; CHECK-NOT: br i1 %cmp +; CHECK: ret void + +define void @unroll_full() { +entry: + br label %do.body + +do.body: ; preds = %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 + store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %do.body, label %do.end, !llvm.loop !1 + +do.end: ; preds = %do.body + ret void +} + +; Check that the same loop in override_no_unroll is not unrolled when a high default +; unroll threshold specified using the amdgpu-unroll-threshold function attribute +; is overridden by a low threshold using the llvm.loop.unroll.threshold metadata + +; CHECK-LABEL: @override_no_unroll +; CHECK: entry: +; CHECK: br i1 %cmp +; CHECK: ret void + +define void @override_no_unroll() #0 { +entry: + br label %do.body + +do.body: ; preds = %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 + store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %do.body, label %do.end, !llvm.loop !3 + +do.end: ; preds = %do.body + ret void +} + +; Check that the same loop in override_unroll is fully unrolled when a low default +; unroll threshold specified using the amdgpu-unroll-threshold function attribute +; is overridden by a high threshold using the llvm.loop.unroll.threshold metadata + +; CHECK-LABEL: @override_unroll +; CHECK: entry: +; CHECK-NOT: br i1 %cmp +; CHECK: ret void + +define void @override_unroll() #1 { +entry: + br label %do.body + +do.body: ; preds = %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 + store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %do.body, label %do.end, !llvm.loop !1 + +do.end: ; preds = %do.body + ret void +} + +attributes #0 = { "amdgpu-unroll-threshold"="1000" } +attributes #1 = { "amdgpu-unroll-threshold"="100" } + +!1 = !{!1, !2} +!2 = !{!"llvm.loop.unroll.threshold", i32 1000} +!3 = !{!3, !4} +!4 = !{!"llvm.loop.unroll.threshold", i32 100}