Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -142,7 +142,7 @@ void disableSROA(Value *V); void accumulateSROACost(DenseMap::iterator CostIt, int InstructionCost); - bool isGEPOffsetConstant(GetElementPtrInst &GEP); + void accumulateCost(int InstructionCost); bool isGEPFree(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); @@ -208,6 +208,12 @@ bool visitCleanupReturnInst(CleanupReturnInst &RI); bool visitCatchReturnInst(CatchReturnInst &RI); bool visitUnreachableInst(UnreachableInst &I); + /// Simplify \p I if its operands are constants and update SimplifiedValues. + /// Transfom is a lambda specific to instruction type that evaluates the + /// instruction when all the operands are constants. + bool + simplifyInstruction(Instruction &I, + function_ref)> Evaluate); public: CallAnalyzer(const TargetTransformInfo &TTI, @@ -298,17 +304,6 @@ SROACostSavings += InstructionCost; } -/// \brief Check whether a GEP's indices are all constant. -/// -/// Respects any simplified values known during the analysis of this callsite. -bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { - for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) - if (!isa(*I) && !SimplifiedValues.lookup(*I)) - return false; - - return true; -} - /// \brief Accumulate a constant GEP offset into an APInt if possible. /// /// Returns false if unable to compute the offset for any reason. Respects any @@ -404,6 +399,10 @@ return true; } +void CallAnalyzer::accumulateCost(int InstructionCost) { + Cost += InstructionCost; +} + bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { Value *SROAArg; DenseMap::iterator CostIt; @@ -438,6 +437,14 @@ } } + // Lambda to check whether a GEP's indices are all constant. + auto isGEPOffsetConstant = [&](GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa(*I) && !SimplifiedValues.lookup(*I)) + return false; + return true; + }; + if (isGEPOffsetConstant(I)) { if (SROACandidate) SROAArgValues[&I] = SROAArg; @@ -452,16 +459,32 @@ return isGEPFree(I); } +bool CallAnalyzer::simplifyInstruction( + Instruction &I, function_ref)> Evaluate) { + Constant *COps[2]; + int N = 0; + for (Value *Op : I.operands()) { + Constant *COp = dyn_cast(Op); + if (!COp) + COp = SimplifiedValues.lookup(Op); + if (!COp) + return false; + COps[N++] = COp; + } + if (auto *C = Evaluate(COps)) { + SimplifiedValues[&I] = C; + return true; + } + return false; +} + bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } + auto Evaluate = [&](ArrayRef COps) -> Constant * { + return ConstantExpr::getBitCast(COps[0], I.getType()); + }; + if (simplifyInstruction(I, Evaluate)) + return true; // Track base/offsets through casts std::pair BaseAndOffset = @@ -482,14 +505,11 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Propagate constants through ptrtoint. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } + auto Evaluate = [&](ArrayRef COps) -> Constant * { + return ConstantExpr::getPtrToInt(COps[0], I.getType()); + }; + if (simplifyInstruction(I, Evaluate)) + return true; // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. @@ -519,14 +539,11 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // Propagate constants through ptrtoint. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } + auto Evaluate = [&](ArrayRef COps) -> Constant * { + return ConstantExpr::getIntToPtr(COps[0], I.getType()); + }; + if (simplifyInstruction(I, Evaluate)) + return true; // Track base/offset pairs when round-tripped through a pointer without // modifications provided the integer is not too large. @@ -550,14 +567,11 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { // Propagate constants through ptrtoint. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } + auto Evaluate = [&](ArrayRef COps) -> Constant * { + return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); + }; + if (simplifyInstruction(I, Evaluate)) + return true; // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. disableSROA(I.getOperand(0)); @@ -567,16 +581,12 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { Value *Operand = I.getOperand(0); - Constant *COp = dyn_cast(Operand); - if (!COp) - COp = SimplifiedValues.lookup(Operand); - if (COp) { + auto Evaluate = [&](ArrayRef COps) -> Constant * { const DataLayout &DL = F.getParent()->getDataLayout(); - if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) { - SimplifiedValues[&I] = C; - return true; - } - } + return ConstantFoldInstOperands(&I, COps[0], DL); + }; + if (simplifyInstruction(I, Evaluate)) + return true; // Disable any SROA on the argument to arbitrary unary operators. disableSROA(Operand); @@ -668,8 +678,8 @@ if (!Caller->optForMinSize()) { if (Callee.hasFnAttribute(Attribute::InlineHint)) Threshold = MaxIfValid(Threshold, Params.HintThreshold); + BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; if (PSI) { - BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; if (PSI->isHotCallSite(CS, CallerBFI)) { DEBUG(dbgs() << "Hot callsite.\n"); Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold); @@ -697,20 +707,11 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. - if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - if (Constant *CLHS = dyn_cast(LHS)) { - if (Constant *CRHS = dyn_cast(RHS)) - if (Constant *C = - ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - return true; - } - } + auto Evaluate = [&](ArrayRef COps) -> Constant * { + return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); + }; + if (simplifyInstruction(I, Evaluate)) + return true; if (I.getOpcode() == Instruction::FCmp) return false; @@ -788,24 +789,19 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - const DataLayout &DL = F.getParent()->getDataLayout(); - if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - Value *SimpleV = nullptr; - if (auto FI = dyn_cast(&I)) - SimpleV = - SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); - else - SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); + auto Evaluate = [&](ArrayRef COps) -> Constant * { + Value *SimpleV = nullptr; + const DataLayout &DL = F.getParent()->getDataLayout(); + if (auto FI = dyn_cast(&I)) + SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1], + FI->getFastMathFlags(), DL); + else + SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL); + return dyn_cast_or_null(SimpleV); + }; - if (Constant *C = dyn_cast_or_null(SimpleV)) { - SimplifiedValues[&I] = C; + if (simplifyInstruction(I, Evaluate)) return true; - } // Disable any SROA on arguments to arbitrary, unsimplified binary operators. disableSROA(LHS); @@ -846,13 +842,11 @@ bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { // Constant folding for extract value is trivial. - Constant *C = dyn_cast(I.getAggregateOperand()); - if (!C) - C = SimplifiedValues.lookup(I.getAggregateOperand()); - if (C) { - SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + auto Evaluate = [&](ArrayRef COps) -> Constant * { + return ConstantExpr::getExtractValue(COps[0], I.getIndices()); + }; + if (simplifyInstruction(I, Evaluate)) return true; - } // SROA can look through these but give them a cost. return false; @@ -860,17 +854,13 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { // Constant folding for insert value is trivial. - Constant *AggC = dyn_cast(I.getAggregateOperand()); - if (!AggC) - AggC = SimplifiedValues.lookup(I.getAggregateOperand()); - Constant *InsertedC = dyn_cast(I.getInsertedValueOperand()); - if (!InsertedC) - InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); - if (AggC && InsertedC) { - SimplifiedValues[&I] = - ConstantExpr::getInsertValue(AggC, InsertedC, I.getIndices()); + auto Evaluate = [&](ArrayRef COps) -> Constant * { + return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], + /*InsertedValueOperand*/ COps[1], + I.getIndices()); + }; + if (simplifyInstruction(I, Evaluate)) return true; - } // SROA can look through these but give them a cost. return false; @@ -889,7 +879,6 @@ // inside of instsimplify, directly constant fold calls here. if (!canConstantFoldCallTo(F)) return false; - // Try to re-map the arguments to constants. SmallVector ConstantArgs; ConstantArgs.reserve(CS.arg_size()); @@ -935,7 +924,7 @@ case Intrinsic::load_relative: // This is normally lowered to 4 LLVM instructions. - Cost += 3 * InlineConstants::InstrCost; + accumulateCost(3 * InlineConstants::InstrCost); return false; case Intrinsic::memset: @@ -959,12 +948,12 @@ if (TTI.isLoweredToCall(F)) { // We account for the average 1 instruction per call argument setup // here. - Cost += CS.arg_size() * InlineConstants::InstrCost; + accumulateCost(CS.arg_size() * InlineConstants::InstrCost); // Everything other than inline ASM will also have a significant cost // merely from making the call. if (!isa(CS.getCalledValue())) - Cost += InlineConstants::CallPenalty; + accumulateCost(InlineConstants::CallPenalty); } return Base::visitCallSite(CS); @@ -976,7 +965,7 @@ // First, pay the price of the argument setup. We account for the average // 1 instruction per call argument setup here. - Cost += CS.arg_size() * InlineConstants::InstrCost; + accumulateCost(CS.arg_size() * InlineConstants::InstrCost); // Next, check if this happens to be an indirect function call to a known // function in this inline context. If not, we've done all we can. @@ -1024,26 +1013,32 @@ // branches. if (isa(SI.getCondition())) return true; + + auto SwitchInstCost = [](SwitchInst &SI) { + // We need to compute a cost proportional to the number of + // distinct successor blocks. This fan-out in the CFG cannot be represented + // for free even if we can represent the core switch as a jumptable that + // takes a single instruction. + // + // NB: We convert large switches which are just used to initialize large phi + // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent + // inlining those. It will prevent inlining in cases where the optimization + // does not (yet) fire. + SmallPtrSet SuccessorBlocks; + SuccessorBlocks.insert(SI.getDefaultDest()); + for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I) + SuccessorBlocks.insert(I.getCaseSuccessor()); + + // Add cost corresponding to the number of distinct destinations. The first + // we model as free because of fallthrough. + return (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + }; + if (Value *V = SimplifiedValues.lookup(SI.getCondition())) if (isa(V)) return true; - // Otherwise, we need to accumulate a cost proportional to the number of - // distinct successor blocks. This fan-out in the CFG cannot be represented - // for free even if we can represent the core switch as a jumptable that - // takes a single instruction. - // - // NB: We convert large switches which are just used to initialize large phi - // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent - // inlining those. It will prevent inlining in cases where the optimization - // does not (yet) fire. - SmallPtrSet SuccessorBlocks; - SuccessorBlocks.insert(SI.getDefaultDest()); - for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I) - SuccessorBlocks.insert(I.getCaseSuccessor()); - // Add cost corresponding to the number of distinct destinations. The first - // we model as free because of fallthrough. - Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + accumulateCost(SwitchInstCost(SI)); return false; } @@ -1144,7 +1139,7 @@ if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive || hasSoftFloatAttr) - Cost += InlineConstants::CallPenalty; + accumulateCost(InlineConstants::CallPenalty); } // If the instruction simplified to a constant, there is no cost to this @@ -1155,7 +1150,7 @@ if (Base::visit(&*I)) ++NumInstructionsSimplified; else - Cost += InlineConstants::InstrCost; + accumulateCost(InlineConstants::InstrCost); // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || @@ -1278,16 +1273,16 @@ // DataLayout. NumStores = std::min(NumStores, 8U); - Cost -= 2 * NumStores * InlineConstants::InstrCost; + accumulateCost(-2 * NumStores * InlineConstants::InstrCost); } else { // For non-byval arguments subtract off one instruction per call // argument. - Cost -= InlineConstants::InstrCost; + accumulateCost(-InlineConstants::InstrCost); } } // The call instruction also disappears after inlining. - Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty; - + accumulateCost(-InlineConstants::InstrCost - InlineConstants::CallPenalty); + // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = @@ -1298,7 +1293,7 @@ // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; + accumulateCost(InlineConstants::ColdccPenalty); // Check if we're done. This can happen due to bonuses and penalties. if (Cost > Threshold)