diff --git a/clang/test/Misc/loop-opt-setup.c b/clang/test/Misc/loop-opt-setup.c --- a/clang/test/Misc/loop-opt-setup.c +++ b/clang/test/Misc/loop-opt-setup.c @@ -1,5 +1,5 @@ -// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s +// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-NEWPM +// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-OLDPM extern int a[16]; int b = 0; int foo(void) { @@ -8,5 +8,35 @@ a[i] = b += 2; return b; } -// CHECK-NOT: br i1 +// Check br i1 to make sure that the loop is fully unrolled +// CHECK-LABEL-NEWPM: foo +// CHECK-NOT-NEWPM: br i1 +// CHECK-LABEL-OLDPM: foo +// CHECK-NOT-OLDPM: br i1 +void Helper() { + const int *nodes[5]; + int num_active = 5; + + while (num_active) +#pragma clang loop unroll(full) + for (int i = 0; i < 5; ++i) + if (nodes[i]) + --num_active; +} + +// Check br i1 to make sure the loop is gone, there will still be a label branch for the infinite loop. +// CHECK-LABEL-NEWPM: Helper +// CHECK-NEWPM: br label +// CHECK-NEWPM-NOT: br i1 +// CHECK-NEWPM: br label + +// The old pass manager doesn't remove the while loop so check for 5 load i32*. +// CHECK-LABEL-OLDPM: Helper +// CHECK-OLDPM: br label +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: load i32* +// CHECK-OLDPM: ret diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -519,12 +519,13 @@ LPM2.addPass(LoopDeletionPass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile - // inaccurate. - if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || - PGOOpt->Action != PGOOptions::SampleUse) && - PTO.LoopUnrolling) + // inaccurate. The normal unroller doesn't pay attention to forced full unroll + // attributes so we need to make sure and allow the full unroll pass to pay + // attention to it. + if (Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), - /*OnlyWhenForced=*/false, + /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) diff --git a/llvm/test/Transforms/LoopUnroll/FullUnroll.ll b/llvm/test/Transforms/LoopUnroll/FullUnroll.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/FullUnroll.ll @@ -0,0 +1,77 @@ +; RUN: opt -passes='default' -disable-verify --mtriple x86_64-pc-linux-gnu -new-pm-disable-loop-unrolling=true \ +; RUN: -S -o - %s | FileCheck %s + +; This checks that the loop full unroller will fire in the new pass manager +; when forced via #pragma in the source (or annotation in the code). +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; We don't end up deleting the loop, merely turning it infinite, but we remove +; everything inside of it so check for the loop structure and absence of +; conditional branches. +; CHECK-LABEL: bb +; CHECK: br label +; CHECK-NOT: br i1 +; CHECK: br label +; CHECK-NOT: br i1 + +; Function Attrs: noinline nounwind optnone uwtable +define void @foo() #0 { +bb: + %tmp = alloca [5 x i32*], align 16 + %tmp1 = alloca i32, align 4 + %tmp2 = alloca i32, align 4 + store i32 5, i32* %tmp1, align 4 + br label %bb3 + +bb3: ; preds = %bb23, %bb + %tmp4 = load i32, i32* %tmp1, align 4 + %tmp5 = icmp ne i32 %tmp4, 0 + br i1 %tmp5, label %bb6, label %bb24 + +bb6: ; preds = %bb3 + store i32 0, i32* %tmp2, align 4 + br label %bb7 + +bb7: ; preds = %bb20, %bb6 + %tmp8 = load i32, i32* %tmp2, align 4 + %tmp9 = icmp slt i32 %tmp8, 5 + br i1 %tmp9, label %bb10, label %bb23 + +bb10: ; preds = %bb7 + %tmp11 = load i32, i32* %tmp2, align 4 + %tmp12 = sext i32 %tmp11 to i64 + %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %tmp12 + %tmp14 = load i32*, i32** %tmp13, align 8 + %tmp15 = icmp ne i32* %tmp14, null + br i1 %tmp15, label %bb16, label %bb19 + +bb16: ; preds = %bb10 + %tmp17 = load i32, i32* %tmp1, align 4 + %tmp18 = add nsw i32 %tmp17, -1 + store i32 %tmp18, i32* %tmp1, align 4 + br label %bb19 + +bb19: ; preds = %bb16, %bb10 + br label %bb20 + +bb20: ; preds = %bb19 + %tmp21 = load i32, i32* %tmp2, align 4 + %tmp22 = add nsw i32 %tmp21, 1 + store i32 %tmp22, i32* %tmp2, align 4 + br label %bb7, !llvm.loop !1 + +bb23: ; preds = %bb7 + br label %bb3 + +bb24: ; preds = %bb3 + ret void +} + +attributes #0 = { noinline nounwind optnone uwtable } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.unroll.full"} diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -100,6 +100,11 @@ "the OptimizerLast extension point into default pipelines"), cl::Hidden); +// Individual pipeline tuning options. +static cl::opt DisableLoopUnrolling( + "new-pm-disable-loop-unrolling", + cl::desc("Disable loop unrolling in all relevant passes"), cl::init(false)); + extern cl::opt PGOKindFlag; extern cl::opt ProfileFile; extern cl::opt CSPGOKindFlag; @@ -260,6 +265,10 @@ SI.registerCallbacks(PIC); PipelineTuningOptions PTO; + // LoopUnrolling defaults on to true and DisableLoopUnrolling is initialized + // to false above so we shouldn't necessarily need to check whether or not the + // option has been enabled. + PTO.LoopUnrolling = !DisableLoopUnrolling; PTO.Coroutines = Coroutines; PassBuilder PB(TM, PTO, P, &PIC); registerEPCallbacks(PB, VerifyEachPass, DebugPM);