Index: C:/LLVM/llvm/include/llvm/TableGen/Record.h =================================================================== --- C:/LLVM/llvm/include/llvm/TableGen/Record.h +++ C:/LLVM/llvm/include/llvm/TableGen/Record.h @@ -1307,9 +1307,12 @@ } bool isSubClassOf(StringRef Name) const { - for (const auto &SCPair : SuperClasses) + for (const auto &SCPair : SuperClasses) { + // TODO: getNameInitAsString copy constructs a new std::string, + // yet we're only using it in this comparison. Is there a better way? if (SCPair.first->getNameInitAsString() == Name) return true; + } return false; } Index: C:/LLVM/llvm/utils/TableGen/CodeGenInstruction.cpp =================================================================== --- C:/LLVM/llvm/utils/TableGen/CodeGenInstruction.cpp +++ C:/LLVM/llvm/utils/TableGen/CodeGenInstruction.cpp @@ -49,6 +49,11 @@ unsigned MIOperandNo = 0; std::set OperandNames; + + // TODO: can we reserve space for e elements in OperandList? + // Profiling a debug build with ETW revealed that the emplace_back + // at the very bottom of this loop can take up to 227ms, largely + // spent in reallocation. for (unsigned i = 0, e = InDI->getNumArgs()+OutDI->getNumArgs(); i != e; ++i){ Init *ArgInit; std::string ArgName; Index: C:/LLVM/llvm/utils/TableGen/CodeGenTarget.cpp =================================================================== --- C:/LLVM/llvm/utils/TableGen/CodeGenTarget.cpp +++ C:/LLVM/llvm/utils/TableGen/CodeGenTarget.cpp @@ -441,12 +441,24 @@ std::vector I = RC.getAllDerivedDefinitions("Intrinsic"); std::vector Result; + + // Allocate enough memory to hold enough for worst case scenario + // in the following loop. Profiling a debug build with ETW + // revealed that push_back in the loop could take up to 280ms, + // largely spent in reallocation. + Result.reserve(I.size()); for (unsigned i = 0, e = I.size(); i != e; ++i) { bool isTarget = I[i]->getValueAsBit("isTarget"); if (isTarget == TargetOnly) Result.push_back(CodeGenIntrinsic(I[i])); } + + // TODO: does this lambda need to accept parameters by value? + // Profiling a debug build with ETW revealed that each call of + // std::_Unguarded_partition (an internal function, called by std::sort), + // can spend hundreds of milliseconds inside this lambda calling + // ~CodeGenIntrinsic(), which is quite wasteful. std::sort(Result.begin(), Result.end(), [](CodeGenIntrinsic LHS, CodeGenIntrinsic RHS) { return LHS.Name < RHS.Name;