diff --git a/llvm/test/Transforms/LoopUnroll/FullUnroll.ll b/llvm/test/Transforms/LoopUnroll/FullUnroll.ll --- a/llvm/test/Transforms/LoopUnroll/FullUnroll.ll +++ b/llvm/test/Transforms/LoopUnroll/FullUnroll.ll @@ -1,70 +1,41 @@ -; RUN: opt -passes='default' -disable-verify --mtriple x86_64-pc-linux-gnu -disable-loop-unrolling=true \ -; RUN: -S -o - %s | FileCheck %s +; RUN: opt -passes='loop-unroll-full' -disable-verify --mtriple x86_64-pc-linux-gnu -S -o - %s | FileCheck %s ; This checks that the loop full unroller will fire in the new pass manager ; when forced via #pragma in the source (or annotation in the code). -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" -; We don't end up deleting the loop, merely turning it infinite, but we remove -; everything inside of it so check for the loop structure and absence of -; conditional branches. -; CHECK-LABEL: bb -; CHECK: br label -; CHECK-NOT: br i1 -; CHECK: br label +; Completely unroll the inner loop +; CHECK-LABEL: @foo +; CHECK: br i1 ; CHECK-NOT: br i1 ; Function Attrs: noinline nounwind optnone uwtable -define void @foo() #0 { +define void @foo() local_unnamed_addr #0 { bb: %tmp = alloca [5 x i32*], align 16 - %tmp1 = alloca i32, align 4 - %tmp2 = alloca i32, align 4 - store i32 5, i32* %tmp1, align 4 - br label %bb3 - -bb3: ; preds = %bb23, %bb - %tmp4 = load i32, i32* %tmp1, align 4 - %tmp5 = icmp ne i32 %tmp4, 0 - br i1 %tmp5, label %bb6, label %bb24 + br label %bb7.preheader -bb6: ; preds = %bb3 - store i32 0, i32* %tmp2, align 4 - br label %bb7 +bb3.loopexit: ; preds = %bb10 + %spec.select.lcssa = phi i32 [ %spec.select, %bb10 ] + %tmp5.not = icmp eq i32 %spec.select.lcssa, 0 + br i1 %tmp5.not, label %bb24, label %bb7.preheader -bb7: ; preds = %bb20, %bb6 - %tmp8 = load i32, i32* %tmp2, align 4 - %tmp9 = icmp slt i32 %tmp8, 5 - br i1 %tmp9, label %bb10, label %bb23 +bb7.preheader: ; preds = %bb3.loopexit, %bb + %tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ] + br label %bb10 -bb10: ; preds = %bb7 - %tmp11 = load i32, i32* %tmp2, align 4 - %tmp12 = sext i32 %tmp11 to i64 - %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %tmp12 +bb10: ; preds = %bb10, %bb7.preheader + %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ] + %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ] + %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv %tmp14 = load i32*, i32** %tmp13, align 8 - %tmp15 = icmp ne i32* %tmp14, null - br i1 %tmp15, label %bb16, label %bb19 - -bb16: ; preds = %bb10 - %tmp17 = load i32, i32* %tmp1, align 4 - %tmp18 = add nsw i32 %tmp17, -1 - store i32 %tmp18, i32* %tmp1, align 4 - br label %bb19 - -bb19: ; preds = %bb16, %bb10 - br label %bb20 - -bb20: ; preds = %bb19 - %tmp21 = load i32, i32* %tmp2, align 4 - %tmp22 = add nsw i32 %tmp21, 1 - store i32 %tmp22, i32* %tmp2, align 4 - br label %bb7, !llvm.loop !1 - -bb23: ; preds = %bb7 - br label %bb3 - -bb24: ; preds = %bb3 + %tmp15.not = icmp ne i32* %tmp14, null + %tmp18 = sext i1 %tmp15.not to i32 + %spec.select = add nsw i32 %tmp1.14, %tmp18 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 5 + br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !1 + +bb24: ; preds = %bb3.loopexit ret void }