diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -654,7 +654,7 @@ const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget, + Expander.isHighCostExpansion(TripCountSC, L, 2 * SCEVCheapExpansionBudget, TTI, PreHeaderBR)) { LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; diff --git a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll --- a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll @@ -11,12 +11,46 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[I4]], i64 [[I2]]) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: @@ -51,12 +85,46 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[I4]], i64 [[I2]]) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add nuw nsw i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add nuw nsw i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add nuw nsw i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add nuw nsw i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add nuw nsw i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add nuw nsw i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add nuw nsw i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add nuw nsw i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: diff --git a/llvm/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll b/llvm/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll --- a/llvm/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll +++ b/llvm/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll @@ -1,12 +1,15 @@ -; RUN: opt -S -unroll-runtime -passes=loop-unroll < %s | FileCheck %s +; RUN: opt -S -unroll-runtime -passes=loop-unroll -scev-cheap-expansion-budget=1 < %s | FileCheck %s --check-prefixes=CHECK +; RUN: opt -S -unroll-runtime -passes=loop-unroll -scev-cheap-expansion-budget=8192 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-HIGH-BUDGET +; RUN: opt -S -unroll-runtime -passes=loop-unroll < %s | FileCheck %s --check-prefixes=CHECK target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -;; Check that we don't emit expensive instructions to compute trip +;; Check that we generally don't emit expensive instructions to compute trip ;; counts when unrolling loops. define i32 @test(i64 %v12, ptr %array, ptr %loc) { ; CHECK-LABEL: @test( +; CHECK-HIGH-BUDGET: udiv ; CHECK-NOT: udiv entry: %step = load i64, ptr %loc, !range !0