Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -96,6 +96,10 @@ DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass")); +static cl::opt MaxNumPromotions( + "licm-max-num-promotions", cl::Hidden, cl::init(0), + cl::desc("The maximum number of memory instructions to be hoisted")); + static cl::opt ControlFlowHoisting( "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); @@ -328,6 +332,20 @@ } } +static void SetMaxNumPromotions(TargetTransformInfo *TTI) { + // TODO: if the maximum number of promotions isn't user specified, + // we set it to number of available scalar registers. This is a very + // first step to not let the register pressure explode for some cases. + // For now, some instructions are not considered for hoisting when + // the maximum is reached, but better would be to come up with a + // register pressure estimator. + if (!MaxNumPromotions) + MaxNumPromotions = + TTI->getNumberOfRegisters(TTI->getRegisterClassForType(false)); + LLVM_DEBUG(dbgs() << "LICM: Using promotion maximum: " << + MaxNumPromotions << "\n"); +} + /// Hoist expressions out of the specified loop. Note, alias info for inner /// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. @@ -345,6 +363,8 @@ return false; } + SetMaxNumPromotions(TTI); + std::unique_ptr CurAST; std::unique_ptr MSSAU; std::unique_ptr Flags; @@ -838,6 +858,7 @@ LoopBlocksRPO Worklist(CurLoop); Worklist.perform(LI); bool Changed = false; + unsigned Hoisted = 0; for (BasicBlock *BB : Worklist) { // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). @@ -863,6 +884,11 @@ continue; } + if (Hoisted == MaxNumPromotions) { + LLVM_DEBUG(dbgs() << "LICM: maximum reached, not hoisting: "; I.dump()); + continue; + } + // Try hoisting the instruction out to the preheader. We can only do // this if all of the operands of the instruction are loop invariant and // if it is safe to hoist the instruction. We also check block frequency @@ -881,6 +907,7 @@ MSSAU, SE, ORE); HoistedInstructions.push_back(&I); Changed = true; + Hoisted++; continue; } @@ -907,6 +934,7 @@ SafetyInfo, MSSAU, SE, ORE); HoistedInstructions.push_back(ReciprocalDivisor); Changed = true; + Hoisted++; continue; } @@ -926,6 +954,7 @@ MSSAU, SE, ORE); HoistedInstructions.push_back(&I); Changed = true; + Hoisted++; continue; } @@ -940,6 +969,7 @@ MSSAU, SE, ORE); assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); Changed = true; + Hoisted++; continue; } } Index: llvm/test/Transforms/LICM/hoist-round.ll =================================================================== --- llvm/test/Transforms/LICM/hoist-round.ll +++ llvm/test/Transforms/LICM/hoist-round.ll @@ -1,6 +1,8 @@ -; RUN: opt -S -licm < %s | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' -S %s | FileCheck %s -; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=DEFAULT +; RUN: opt -S -licm -licm-max-num-promotions=14 < %s | FileCheck %s --check-prefix=MAX +; RUN: opt -licm-max-num-promotions=14 -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' -S %s | FileCheck %s --check-prefix=MAX +; RUN: opt -S -licm -licm-max-num-promotions=14 -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=MAX target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32" @@ -8,22 +10,63 @@ ; copysign, minnum, maxnum, minimum, maximum, and fabs intrinsics are ; considered safe to speculate. -; CHECK-LABEL: @test -; CHECK: call float @llvm.ceil.f32 -; CHECK: call float @llvm.floor.f32 -; CHECK: call float @llvm.nearbyint.f32 -; CHECK: call float @llvm.rint.f32 -; CHECK: call float @llvm.round.f32 -; CHECK: call float @llvm.trunc.f32 -; CHECK: call float @llvm.fabs.f32 -; CHECK: call float @llvm.copysign.f32 -; CHECK: call float @llvm.minnum.f32 -; CHECK: call float @llvm.maxnum.f32 -; CHECK: call float @llvm.powi.f32 -; CHECK: call float @llvm.roundeven.f32 -; CHECK: for.body: - define void @test(float %arg1, float %arg2) { +; DEFAULT-LABEL: @test( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP_1:%.*]] = call float @llvm.ceil.f32(float [[ARG1:%.*]]) +; DEFAULT-NEXT: [[TMP_2:%.*]] = call float @llvm.floor.f32(float [[TMP_1]]) +; DEFAULT-NEXT: [[TMP_3:%.*]] = call float @llvm.nearbyint.f32(float [[TMP_2]]) +; DEFAULT-NEXT: [[TMP_4:%.*]] = call float @llvm.rint.f32(float [[TMP_3]]) +; DEFAULT-NEXT: [[TMP_5:%.*]] = call float @llvm.round.f32(float [[TMP_4]]) +; DEFAULT-NEXT: [[TMP_6:%.*]] = call float @llvm.trunc.f32(float [[TMP_5]]) +; DEFAULT-NEXT: [[TMP_7:%.*]] = call float @llvm.fabs.f32(float [[TMP_6]]) +; DEFAULT-NEXT: [[TMP_8:%.*]] = call float @llvm.copysign.f32(float [[TMP_7]], float [[ARG2:%.*]]) +; DEFAULT-NEXT: br label [[FOR_HEAD:%.*]] +; DEFAULT: for.head: +; DEFAULT-NEXT: [[IND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IND_NEW:%.*]], [[FOR_BODY:%.*]] ] +; DEFAULT-NEXT: [[CMP:%.*]] = icmp slt i32 [[IND]], 10 +; DEFAULT-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]] +; DEFAULT: for.body: +; DEFAULT-NEXT: [[TMP_9:%.*]] = call float @llvm.minnum.f32(float [[TMP_8]], float [[ARG2]]) +; DEFAULT-NEXT: [[TMP_10:%.*]] = call float @llvm.maxnum.f32(float [[TMP_9]], float [[ARG2]]) +; DEFAULT-NEXT: [[TMP_11:%.*]] = call float @llvm.minimum.f32(float [[TMP_10]], float [[ARG2]]) +; DEFAULT-NEXT: [[TMP_12:%.*]] = call float @llvm.maximum.f32(float [[TMP_11]], float [[ARG2]]) +; DEFAULT-NEXT: [[TMP_13:%.*]] = call float @llvm.powi.f32(float [[TMP_12]], i32 4) +; DEFAULT-NEXT: [[TMP_14:%.*]] = call float @llvm.roundeven.f32(float [[TMP_13]]) +; DEFAULT-NEXT: call void @consume(float [[TMP_14]]) +; DEFAULT-NEXT: [[IND_NEW]] = add i32 [[IND]], 1 +; DEFAULT-NEXT: br label [[FOR_HEAD]] +; DEFAULT: exit: +; DEFAULT-NEXT: ret void +; +; MAX-LABEL: @test( +; MAX-NEXT: entry: +; MAX-NEXT: [[TMP_1:%.*]] = call float @llvm.ceil.f32(float [[ARG1:%.*]]) +; MAX-NEXT: [[TMP_2:%.*]] = call float @llvm.floor.f32(float [[TMP_1]]) +; MAX-NEXT: [[TMP_3:%.*]] = call float @llvm.nearbyint.f32(float [[TMP_2]]) +; MAX-NEXT: [[TMP_4:%.*]] = call float @llvm.rint.f32(float [[TMP_3]]) +; MAX-NEXT: [[TMP_5:%.*]] = call float @llvm.round.f32(float [[TMP_4]]) +; MAX-NEXT: [[TMP_6:%.*]] = call float @llvm.trunc.f32(float [[TMP_5]]) +; MAX-NEXT: [[TMP_7:%.*]] = call float @llvm.fabs.f32(float [[TMP_6]]) +; MAX-NEXT: [[TMP_8:%.*]] = call float @llvm.copysign.f32(float [[TMP_7]], float [[ARG2:%.*]]) +; MAX-NEXT: [[TMP_9:%.*]] = call float @llvm.minnum.f32(float [[TMP_8]], float [[ARG2]]) +; MAX-NEXT: [[TMP_10:%.*]] = call float @llvm.maxnum.f32(float [[TMP_9]], float [[ARG2]]) +; MAX-NEXT: [[TMP_11:%.*]] = call float @llvm.minimum.f32(float [[TMP_10]], float [[ARG2]]) +; MAX-NEXT: [[TMP_12:%.*]] = call float @llvm.maximum.f32(float [[TMP_11]], float [[ARG2]]) +; MAX-NEXT: [[TMP_13:%.*]] = call float @llvm.powi.f32(float [[TMP_12]], i32 4) +; MAX-NEXT: [[TMP_14:%.*]] = call float @llvm.roundeven.f32(float [[TMP_13]]) +; MAX-NEXT: br label [[FOR_HEAD:%.*]] +; MAX: for.head: +; MAX-NEXT: [[IND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IND_NEW:%.*]], [[FOR_BODY:%.*]] ] +; MAX-NEXT: [[CMP:%.*]] = icmp slt i32 [[IND]], 10 +; MAX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]] +; MAX: for.body: +; MAX-NEXT: call void @consume(float [[TMP_14]]) +; MAX-NEXT: [[IND_NEW]] = add i32 [[IND]], 1 +; MAX-NEXT: br label [[FOR_HEAD]] +; MAX: exit: +; MAX-NEXT: ret void +; entry: br label %for.head