Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -1580,6 +1580,11 @@ bool IsSubExpr, bool AllowPredicates = false); + /// Analogous to ICmp version, but for fcmps. The fcmp is assumed to control + /// the exit and predicates are not yet supported. + ExitLimit computeExitLimitFromFCmp(const Loop *L, FCmpInst *ExitCond, + bool ExitIfTrue); + /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a switch with a single exiting case /// to ExitingBB. Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -7314,7 +7314,16 @@ return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit, /*AllowPredicates=*/true); } - + + // Try for some basic fcmp idioms. This is pretty weak as SCEV doesn't + // understand floating point and thus it has to be done entirely from IR + // pattern matching. + if (FCmpInst *FCmp = dyn_cast(ExitCond)) { + ExitLimit EL = + computeExitLimitFromFCmp(L, FCmp, ExitIfTrue); + if (EL.hasAnyInfo()) + return EL; + } // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to // preserve the CFG and is temporarily leaving constant conditions @@ -7431,6 +7440,128 @@ ExitCond->getOperand(1), L, OriginalPred); } +/// Return true if the given integer value is within the precisely +/// representable integer range of the given floating point type. Note that +/// this stricter than the integer being precisely representable as it requires +/// all integer values closer to zero to *also* be precisely representable. +static bool isWithinPreciseIntRange(Type *FPTy, int64_t Val) { + unsigned MantissaWidth = FPTy->getFPMantissaWidth(); + if (MantissaWidth > 63) + return false; + APInt MaxValue = APInt(64, 2 << MantissaWidth, true); + APInt MinValue = -MaxValue; + return MaxValue.sge(Val) && MinValue.sle(Val); +} + +/// Convert APF to an integer, if possible. +static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { + bool isExact = false; + // See if we can convert this to an int64_t + uint64_t UIntVal; + if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, + APFloat::rmTowardZero, &isExact) != APFloat::opOK || + !isExact) + return false; + IntVal = UIntVal; + return true; +} + +/// If the float argument represents a integer constant from within the integer +/// sub-range of the float's type, return it. Otherwise, return None. +Optional matchIntRangeFromFloat(Value *V) { + assert(V->getType()->isFloatingPointTy()); + auto *FPCon = dyn_cast(V); + if (!FPCon) + return None; + + int64_t Res; + if (!ConvertToSInt(FPCon->getValueAPF(), Res)) + return None; + if (!isWithinPreciseIntRange(V->getType(), Res)) + return None; + return Res; +} + + +ScalarEvolution::ExitLimit +ScalarEvolution::computeExitLimitFromFCmp(const Loop *L, + FCmpInst *ExitCond, + bool ExitIfTrue) { + + // TODO: Handle ExitIfTrue + if (ExitIfTrue) + return getCouldNotCompute(); + CmpInst::Predicate Pred = ExitCond->getPredicate(); + auto *LHS = dyn_cast(ExitCond->getOperand(0)); + if (!LHS) + return getCouldNotCompute(); + Value *RHS = ExitCond->getOperand(1); + Type *FPType = LHS->getType(); + // TODO: handle swap idiom + + // Match the pattern: + // %phi = phi [InitCon, %entry], [%inc, %backedge] + // ... + // %inc = fadd %phi, IncCon + // ... + // br (fcmp %inc, ExitCon), %exit, %header + // TODO: Support the pre-increment form as well. + auto ExitCon = matchIntRangeFromFloat(RHS); + if (!ExitCon) + return getCouldNotCompute(); + + if (LHS->getOpcode() != Instruction::FAdd) + return getCouldNotCompute(); + + auto IncCon = matchIntRangeFromFloat(LHS->getOperand(1)); + if (!IncCon) + return getCouldNotCompute(); + auto *PN = dyn_cast(LHS->getOperand(0)); + if (!PN || PN->getParent() != L->getHeader()) + return getCouldNotCompute(); + + unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); + unsigned BackEdge = IncomingEdge^1; + + if (PN->getIncomingValue(BackEdge) != LHS) + return getCouldNotCompute(); + + auto InitCon = matchIntRangeFromFloat(PN->getIncomingValue(IncomingEdge)); + if (!InitCon) + return getCouldNotCompute(); + + // Now that we've matched our pattern, try to compute an ExitLimit. For the + // moment, we only handle a single super simple case of OLT/ULT against an + // induction variable w/step one. This side steps all the complicated cases. + // Further cases can be added incrementally as needed. + + const int64_t InitValue = *InitCon; + const int64_t IncValue = *IncCon; + const int64_t ExitValue = *ExitCon; + + // Check to see if all values are within precisely representable integer + // sub-range. All we did above is prove that our values were precisely + // representable as floats which is NOT the same. + if (!isWithinPreciseIntRange(FPType, InitValue) || + !isWithinPreciseIntRange(FPType, IncValue) || + !isWithinPreciseIntRange(FPType, ExitValue)) + return getCouldNotCompute(); + + switch (Pred) { + default: + return getCouldNotCompute(); // Unhandled comparison + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + break; + } + + if (IncValue != 1 || InitValue >= ExitValue) + return getCouldNotCompute(); + + return getMinusSCEV(getConstant(APInt(64, ExitValue, true)), + getConstant(APInt(64, InitValue, true))); +} + ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, Index: test/Analysis/ScalarEvolution/trip-count-float.ll =================================================================== --- test/Analysis/ScalarEvolution/trip-count-float.ll +++ test/Analysis/ScalarEvolution/trip-count-float.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -S < %s -analyze -scalar-evolution | FileCheck %s + +define void @test() { +; CHECK-LABEL: 'test' +; CHECK-NEXT: Classifying expressions for: @test +; CHECK-NEXT: Determining loop execution counts for: @test +; CHECK-NEXT: Loop %bb8: backedge-taken count is 99999 +; CHECK-NEXT: Loop %bb8: max backedge-taken count is 99999 +; CHECK-NEXT: Loop %bb8: Predicated backedge-taken count is 99999 +; CHECK-NEXT: Predicates: +; CHECK: Loop %bb8: Trip multiple is 100000 +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 1.000000e+00 + %tmp14 = fcmp olt double %tmp13, 9.999900e+04 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +define void @test2() { +; CHECK-LABEL: 'test2' +; CHECK-NEXT: Classifying expressions for: @test2 +; CHECK-NEXT: Determining loop execution counts for: @test2 +; CHECK-NEXT: Loop %bb8: backedge-taken count is 2000 +; CHECK-NEXT: Loop %bb8: max backedge-taken count is 2000 +; CHECK-NEXT: Loop %bb8: Predicated backedge-taken count is 2000 +; CHECK-NEXT: Predicates: +; CHECK: Loop %bb8: Trip multiple is 2001 +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 1.000000e+00 + %tmp14 = fcmp olt double %tmp13, 2.0000e+03 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +define void @test3() { +; CHECK-LABEL: 'test3' +; CHECK-NEXT: Classifying expressions for: @test3 +; CHECK-NEXT: Determining loop execution counts for: @test3 +; CHECK-NEXT: Loop %bb8: backedge-taken count is 8888 +; CHECK-NEXT: Loop %bb8: max backedge-taken count is 8888 +; CHECK-NEXT: Loop %bb8: Predicated backedge-taken count is 8888 +; CHECK-NEXT: Predicates: +; CHECK: Loop %bb8: Trip multiple is 8889 +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 1.000000e+00 + %tmp14 = fcmp ult double %tmp13, 8.888e+03 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +define void @test4() { +; CHECK-LABEL: 'test4' +; CHECK-NEXT: Classifying expressions for: @test4 +; CHECK-NEXT: Determining loop execution counts for: @test4 +; CHECK-NEXT: Loop %bb8: backedge-taken count is 1 +; CHECK-NEXT: Loop %bb8: max backedge-taken count is 1 +; CHECK-NEXT: Loop %bb8: Predicated backedge-taken count is 1 +; CHECK-NEXT: Predicates: +; CHECK: Loop %bb8: Trip multiple is 2 +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 1.000000e+00 + %tmp14 = fcmp ult double %tmp13, 1.e+00 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +define void @test5() { +; CHECK-LABEL: 'test5' +; CHECK-NEXT: Classifying expressions for: @test5 +; CHECK-NEXT: Determining loop execution counts for: @test5 +; CHECK-NEXT: Loop %bb8: backedge-taken count is 0 +; CHECK-NEXT: Loop %bb8: max backedge-taken count is 0 +; CHECK-NEXT: Loop %bb8: Predicated backedge-taken count is 0 +; CHECK-NEXT: Predicates: +; CHECK: Loop %bb8: Trip multiple is 1 +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 1.000000e+00 + %tmp14 = fcmp ult double %tmp13, 0.e+00 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +; Don't yet handle equalities +define void @todo1() { +; CHECK-LABEL: 'todo1' +; CHECK-NEXT: Classifying expressions for: @todo1 +; CHECK-NEXT: Determining loop execution counts for: @todo1 +; CHECK-NEXT: Loop %bb8: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable predicated backedge-taken count. +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 1.000000e+00 + %tmp14 = fcmp une double %tmp13, 9.999e+03 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +; Don't yet handle non-one strides +define void @todo2() { +; CHECK-LABEL: 'todo2' +; CHECK-NEXT: Classifying expressions for: @todo2 +; CHECK-NEXT: Determining loop execution counts for: @todo2 +; CHECK-NEXT: Loop %bb8: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable predicated backedge-taken count. +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 2.000000e+00 + %tmp14 = fcmp ult double %tmp13, 9.999e+03 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +define void @todo3() { +; CHECK-LABEL: 'todo3' +; CHECK-NEXT: Classifying expressions for: @todo3 +; CHECK-NEXT: Determining loop execution counts for: @todo3 +; CHECK-NEXT: Loop %bb8: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable predicated backedge-taken count. +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 1.000000e+03, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, -1.000000e+00 + %tmp14 = fcmp ugt double %tmp13, 0.0e+00 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +; ExitCount way out of bounds +define void @neg1() { +; CHECK-LABEL: 'neg1' +; CHECK-NEXT: Classifying expressions for: @neg1 +; CHECK-NEXT: Determining loop execution counts for: @neg1 +; CHECK-NEXT: Loop %bb8: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable predicated backedge-taken count. +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 1.000000e+00 + %tmp14 = fcmp olt double %tmp13, 9.999900e+40 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +} + +; Step unanalyzeable large +define void @neg2() { +; CHECK-LABEL: 'neg2' +; CHECK-NEXT: Classifying expressions for: @neg2 +; CHECK-NEXT: Determining loop execution counts for: @neg2 +; CHECK-NEXT: Loop %bb8: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %bb8: Unpredictable predicated backedge-taken count. +; +bb4: + br label %bb8 + +bb8: + %tmp11 = phi double [ 0.000000e+00, %bb4 ], [ %tmp13, %bb8 ] + %tmp13 = fadd double %tmp11, 9.999900e+40 + %tmp14 = fcmp olt double %tmp13, 9.999900e+400 + br i1 %tmp14, label %bb8, label %bb6 + +bb6: + ret void +}