Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1568,6 +1568,10 @@ // Enable partial unrolling and runtime unrolling. BaseT::getUnrollingPreferences(L, SE, UP); + // Enable Upper bound unrolling universally, not dependant upon the conditions + // below. + UP.UpperBound = true; + // For inner loop, it is more likely to be a hot one, and the runtime check // can be promoted out from LICM pass, so the overhead is less, let's try // a larger threshold to unroll more loops. @@ -1608,7 +1612,6 @@ !ST->getSchedModel().isOutOfOrder()) { UP.Runtime = true; UP.Partial = true; - UP.UpperBound = true; UP.UnrollRemainder = true; UP.DefaultUnrollRuntimeCount = 4; Index: llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 | FileCheck %s + +define void @test(i1 %cond) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: switch i32 0, label [[FOR_BODY_IF_END6_CRIT_EDGE:%.*]] [ +; CHECK-NEXT: i32 2, label [[IF_THEN4:%.*]] +; CHECK-NEXT: ] +; CHECK: for.body.if.end6_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: if.then4: +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_BODY_1:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; CHECK: for.body.1: +; CHECK-NEXT: switch i32 1, label [[FOR_BODY_IF_END6_CRIT_EDGE]] [ +; CHECK-NEXT: i32 2, label [[IF_THEN4_1:%.*]] +; CHECK-NEXT: ] +; CHECK: if.then4.1: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[FOR_END]], label [[FOR_BODY_2:%.*]] +; CHECK: for.body.2: +; CHECK-NEXT: switch i32 2, label [[FOR_BODY_IF_END6_CRIT_EDGE]] [ +; CHECK-NEXT: i32 2, label [[IF_THEN4_2:%.*]] +; CHECK-NEXT: ] +; CHECK: if.then4.2: +; CHECK-NEXT: br label [[FOR_END]] +; +entry: + %0 = select i1 %cond, i32 2, i32 3 + br label %for.body + +for.body: ; preds = %if.then4, %entry + %i.017 = phi i32 [ 0, %entry ], [ %inc, %if.then4 ] + switch i32 %i.017, label %for.body.if.end6_crit_edge [ + i32 2, label %if.then4 + ] + +for.body.if.end6_crit_edge: ; preds = %for.body + unreachable + +if.then4: ; preds = %for.body + %inc = add nuw nsw i32 %i.017, 1 + %exitcond.not = icmp eq i32 %inc, %0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %if.then4 + ret void +}