Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -145,6 +145,7 @@ bool isValidRewrite(Value *FromVal, Value *ToVal); + bool optimizeFCmp(FCmpInst *Compare); bool handleFloatingPointIV(Loop *L, PHINode *PH); bool rewriteNonIntegerIVs(Loop *L); @@ -302,6 +303,15 @@ /// for(int i = 0; i < 10000; ++i) /// bar((double)i); bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { + // Note: The code below is unneccessarily specific. It is essentially + // implementing three distinct parts. + // 1) Compute the maximum iteration count from the fcmp/floating IV + // 2) Replace the float IV with an equivalent integer IV + // 3) Replace the fcmp with an icmp. + // We should really split these pieces apart. (1) belongs inside of SCEV's + // exit count reasoning, (2) can be done based on result of (1), and so can + // (3). Doing this would allow us to greatly generalize the pattern matching + // and handle a much broader class of floating point induction variables. unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; @@ -495,6 +505,80 @@ return true; } +/// Return a constant range which describes the range of integers which can be +/// precisely represented without loss in the given FP type. The bitwidth of +/// the returned constant range is the bitwidth of the type queried. Range +/// returned may be smaller than the maximally allowed range for some types. +ConstantRange getPreciseIntRange(Type *FPTy) { + uint64_t BitWidth = FPTy->getPrimitiveSizeInBits(); + if (!FPTy->isFloatTy() && !FPTy->isDoubleTy()) + return ConstantRange::getEmpty(BitWidth); + unsigned MantissaWidth = FPTy->getFPMantissaWidth(); + assert(MantissaWidth <= 64); + APInt MaxValue = APInt(BitWidth, 2 << MantissaWidth, true); + APInt MinValue = -MaxValue; + return ConstantRange(MinValue, MaxValue); +} + +/// Given an (fcmp pred sitofp(x), y) where we know sitofp(x) is a precisely +/// representable integer value, replace fcmp with an icmp if possible. +bool IndVarSimplify::optimizeFCmp(FCmpInst *FCmp) { + // If it isn't a comparison with an integer-as-fp (the exit value), we can't + // transform it. + ConstantFP *ExitValueVal = dyn_cast(FCmp->getOperand(1)); + int64_t ExitValue; + if (ExitValueVal == nullptr || + !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) + return false; + + // TODO: Any RHS fp constant is guaranteed not to be Nan. unordered to + // ordered forms as a canonicalization. + + Type *FPTy = ExitValueVal->getType(); + unsigned BitWidth = FPTy->getPrimitiveSizeInBits(); + + if (!getPreciseIntRange(FPTy).contains(APInt(BitWidth, ExitValue, true))) + return false; + + auto *LHS = dyn_cast(FCmp->getOperand(0)); + if (!LHS) + return false; + + auto *IntOp = LHS->getOperand(0); + ConstantRange CR = SE->getSignedRange(SE->getSCEV(IntOp)); + if (!getPreciseIntRange(FPTy).contains(CR)) + return false; + + // Find new predicate for integer comparison. + CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; + switch (FCmp->getPredicate()) { + default: return false; // Unknown comparison. + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; + } + + auto *ICmp = new ICmpInst(FCmp, NewPred, IntOp, + ConstantInt::get(IntOp->getType(), ExitValue), + ""); + + // Delete the old floating point exit comparison. The branch starts using the + // new comparison. + ICmp->takeName(FCmp); + FCmp->replaceAllUsesWith(ICmp); + RecursivelyDeleteTriviallyDeadInstructions(FCmp, TLI); + return true; +} + bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { // First step. Check to see if there are any floating-point recurrences. // If there are, change them into integer recurrences, permitting analysis by @@ -510,6 +594,26 @@ if (PHINode *PN = dyn_cast_or_null(&*PHIs[i])) Changed |= handleFloatingPointIV(L, PN); + // Find any fcmp's controlling loop exits, and see if we can replace them + // with icmps given known range information. This needs to live here rather + // than SimplifyIndVars as our sitofp may have many uses and we need to avoid + // walking an unbounded size use list. + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBB : ExitingBlocks) { + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + continue; + + FCmpInst *FCmp = dyn_cast(BI->getCondition()); + if (!FCmp) + continue; + + // Try to convert fcmps to icmps if possible + Changed = optimizeFCmp(FCmp); + } + // If the loop previously had floating-point IV, ScalarEvolution // may not have been able to compute a trip count. Now that we've done some // re-writing, the trip count may be computable. Index: test/Transforms/IndVarSimplify/floating-point-iv.ll =================================================================== --- test/Transforms/IndVarSimplify/floating-point-iv.ll +++ test/Transforms/IndVarSimplify/floating-point-iv.ll @@ -268,13 +268,12 @@ ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IV]], 20000 -; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[RETURN:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[RETURN:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_FP:%.*]] = sitofp i64 [[IV]] to double ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) #0 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[IV_FP]], 1.000000e+04 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[IV]], 10000 ; CHECK-NEXT: br i1 [[CMP2]], label [[BB]], label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void @@ -310,8 +309,7 @@ ; CHECK-NEXT: [[IV_FP:%.*]] = sitofp i64 [[IV]] to double ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo(double [[IV_FP]]) #0 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt double [[IV_FP]], 1.000000e+04 -; CHECK-NEXT: br i1 [[CMP2]], label [[BB]], label [[RETURN]] +; CHECK-NEXT: br i1 true, label [[BB]], label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void ;