diff --git a/clang/test/Misc/loop-opt-setup.c b/clang/test/Misc/loop-opt-setup.c --- a/clang/test/Misc/loop-opt-setup.c +++ b/clang/test/Misc/loop-opt-setup.c @@ -1,5 +1,5 @@ -// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s +// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-NEWPM +// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-OLDPM extern int a[16]; int b = 0; int foo(void) { @@ -8,5 +8,34 @@ a[i] = b += 2; return b; } +// Check br i1 to make sure that the loop is fully unrolled // CHECK-NOT: br i1 +inline void Helper() { + const int *nodes[5]; + int num_active = 5; + + while (num_active) { +#pragma clang loop unroll(full) + for (int i = 0; i < 5; ++i) { + if (nodes[i]) { + --num_active; + } + } + } +} + +void Run() { + Helper(); +} + +// Check br i1 to make sure the loop is gone, there will still be a label branch for the infinite loop. +// CHECK-NEWPM-NOT: br i1 + +// The old pass manager doesn't remove the loop so check for 5 load i32*. +// CHECK-OLDPM: Helper +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -503,12 +503,13 @@ LPM2.addPass(LoopDeletionPass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile - // inaccurate. - if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || - PGOOpt->Action != PGOOptions::SampleUse) && - PTO.LoopUnrolling) + // inaccurate. The normal unroller doesn't pay attention to forced full unroll + // attributes so we need to make sure and allow the full unroll pass to pay + // attention to it. + if (Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), - /*OnlyWhenForced=*/false, + /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) diff --git a/llvm/test/Transforms/LoopUnroll/FullUnroll.ll b/llvm/test/Transforms/LoopUnroll/FullUnroll.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/FullUnroll.ll @@ -0,0 +1,73 @@ +; RUN: opt -passes='default' -disable-verify --mtriple x86_64-pc-linux-gnu -new-pm-disable-loop-unrolling=true \ +; RUN: -S -o - %s | FileCheck %s + +; This checks that the loop full unroller will fire in the new pass manager +; when forced via #pragma in the source (or annotation in the code). +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind optnone uwtable +define void @walrus() #0 { +; We don't end up deleting the loop, merely turning it infinite, but we remove +; everything inside of it so checking for a conditional branch will work. +; CHECK-LABEL: entry +; CHECK-NOT: br i1 +entry: + %nodes = alloca [5 x i32*], align 16 + %num_active = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 5, i32* %num_active, align 4 + br label %while.cond + +while.cond: ; preds = %for.end, %entry + %0 = load i32, i32* %num_active, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %while.body, label %while.end + +while.body: ; preds = %while.cond + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %while.body + %1 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %1, 5 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, i32* %i, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds [5 x i32*], [5 x i32*]* %nodes, i64 0, i64 %idxprom + %3 = load i32*, i32** %arrayidx, align 8 + %tobool1 = icmp ne i32* %3, null + br i1 %tobool1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32, i32* %num_active, align 4 + %dec = add nsw i32 %4, -1 + store i32 %dec, i32* %num_active, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %5 = load i32, i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond, !llvm.loop !1 + +for.end: ; preds = %for.cond + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +attributes #0 = { noinline nounwind optnone uwtable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.unroll.full"} diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -100,6 +100,11 @@ "the OptimizerLast extension point into default pipelines"), cl::Hidden); +// Individual pipeline tuning options. +static cl::opt DisableLoopUnrolling( + "new-pm-disable-loop-unrolling", + cl::desc("Disable loop unrolling in all relevant passes"), cl::init(false)); + extern cl::opt PGOKindFlag; extern cl::opt ProfileFile; extern cl::opt CSPGOKindFlag; @@ -260,6 +265,10 @@ SI.registerCallbacks(PIC); PipelineTuningOptions PTO; + // LoopUnrolling defaults on to true and DisableLoopUnrolling is initialized + // to false above so we shouldn't necessarily need to check whether or not the + // option has been enabled. + PTO.LoopUnrolling = !DisableLoopUnrolling; PTO.Coroutines = Coroutines; PassBuilder PB(TM, PTO, P, &PIC); registerEPCallbacks(PB, VerifyEachPass, DebugPM);