diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -39,81 +39,6 @@ class TFModelEvaluatorImpl; class EvaluationResultImpl; -/// Logging utility - given an ordered specification of features, and assuming -/// a scalar reward, allow logging feature values and rewards, and then print -/// as tf.train.SequenceExample text protobuf. -/// The assumption is that, for an event to be logged (i.e. a set of feature -/// values and a reward), the user calls the log* API for each feature exactly -/// once, providing the index matching the position in the feature spec list -/// provided at construction. The example assumes the first feature's element -/// type is float, the second is int64, and the reward is float: -/// -/// event 0: -/// logFloatValue(0, ...) -/// logInt64Value(1, ...) -/// ... -/// logFloatReward(...) -/// event 1: -/// logFloatValue(0, ...) -/// logInt64Value(1, ...) -/// ... -/// logFloatReward(...) -/// -/// At the end, call print to generate the protobuf. -/// Alternatively, don't call logReward at the end of each event, just -/// log{Float|Int32|Int64}FinalReward at the end. -class LoggerDataImpl; -class Logger final { -public: - /// Construct a Logger. If IncludeReward is false, then logReward or - /// logFinalReward shouldn't be called, and the reward feature won't be - /// printed out. - /// NOTE: the FeatureSpecs are expected to be in the same order (i.e. have - /// corresponding indices) with any MLModelRunner implementations - /// corresponding to the model being trained/logged. - Logger(const std::vector &FeatureSpecs, - const TensorSpec &RewardSpec, bool IncludeReward); - - ~Logger(); - - void logFloatReward(float Value); - void logInt32Reward(int32_t Value); - void logInt64Reward(int64_t Value); - - void logFloatFinalReward(float Value); - void logInt32FinalReward(int32_t Value); - void logInt64FinalReward(int64_t Value); - - void logFloatValue(size_t FeatureID, const float *Value); - void logInt32Value(size_t FeatureID, const int32_t *Value); - void logInt64Value(size_t FeatureID, const int64_t *Value); - - void logSpecifiedTensorValue(size_t FeatureID, const char *RawData); - - // Warning! For int32_t, the return is set up for int64_t, so the caller needs - // to piecemeal cast their int32_t values. - // FIXME: let's drop int32_t support. While it's supported by evaluator, it's - // not supported by the tensorflow::SequenceExample proto. For small values, - // we can consider using bytes. - char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID); - - // Flush the content of the log to the stream, clearing the stored data in the - // process. - void flush(std::string *Str); - void flush(raw_ostream &OS); - - // Flush a set of logs that are produced from the same module, e.g. - // per-function regalloc traces, as a google::protobuf::Struct message. - static void flushLogs(raw_ostream &OS, - const StringMap> &Loggers); - -private: - std::vector FeatureSpecs; - TensorSpec RewardSpec; - const bool IncludeReward; - std::unique_ptr LoggerData; -}; - class TFModelEvaluator final { public: /// The result of a model evaluation. Handles the lifetime of the output diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TrainingLogger.h copy from llvm/include/llvm/Analysis/Utils/TFUtils.h copy to llvm/include/llvm/Analysis/Utils/TrainingLogger.h --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TrainingLogger.h @@ -1,4 +1,4 @@ -//===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===// +//===- TrainingLogger.h - mlgo feature/reward logging ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -#ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H -#define LLVM_ANALYSIS_UTILS_TFUTILS_H +#ifndef LLVM_ANALYSIS_UTILS_TRAININGLOGGER_H +#define LLVM_ANALYSIS_UTILS_TRAININGLOGGER_H #include "llvm/Config/llvm-config.h" @@ -22,23 +22,6 @@ namespace llvm { -/// Load a SavedModel, find the given inputs and outputs, and setup storage -/// for input tensors. The user is responsible for correctly dimensioning the -/// input tensors and setting their values before calling evaluate(). -/// To initialize: -/// - construct the object -/// - initialize the input tensors using initInput. Indices must correspond to -/// indices in the InputNames used at construction. -/// To use: -/// - set input values by using getInput to get each input tensor, and then -/// setting internal scalars, for all dimensions (tensors are row-major: -/// https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205) -/// - call evaluate. The input tensors' values are not consumed after this, and -/// may still be read. -/// - use the outputs in the output vector -class TFModelEvaluatorImpl; -class EvaluationResultImpl; - /// Logging utility - given an ordered specification of features, and assuming /// a scalar reward, allow logging feature values and rewards, and then print /// as tf.train.SequenceExample text protobuf. @@ -114,76 +97,7 @@ std::unique_ptr LoggerData; }; -class TFModelEvaluator final { -public: - /// The result of a model evaluation. Handles the lifetime of the output - /// tensors, which means that their values need to be used before - /// the EvaluationResult's dtor is called. - class EvaluationResult { - public: - EvaluationResult(const EvaluationResult &) = delete; - EvaluationResult &operator=(const EvaluationResult &Other) = delete; - - EvaluationResult(EvaluationResult &&Other); - EvaluationResult &operator=(EvaluationResult &&Other); - - ~EvaluationResult(); - - /// Get a (const) pointer to the first element of the tensor at Index. - template T *getTensorValue(size_t Index) { - return static_cast(getUntypedTensorValue(Index)); - } - - template const T *getTensorValue(size_t Index) const { - return static_cast(getUntypedTensorValue(Index)); - } - - /// Get a (const) pointer to the untyped data of the tensor. - void *getUntypedTensorValue(size_t Index); - const void *getUntypedTensorValue(size_t Index) const; - - private: - friend class TFModelEvaluator; - EvaluationResult(std::unique_ptr Impl); - std::unique_ptr Impl; - }; - - TFModelEvaluator(StringRef SavedModelPath, - const std::vector &InputSpecs, - const std::vector &OutputSpecs, - const char *Tags = "serve"); - TFModelEvaluator(StringRef SavedModelPath, - const std::vector &InputSpecs, - function_ref GetOutputSpecs, - size_t OutputSpecsSize, const char *Tags = "serve"); - - ~TFModelEvaluator(); - TFModelEvaluator(const TFModelEvaluator &) = delete; - TFModelEvaluator(TFModelEvaluator &&) = delete; - - /// Evaluate the model, assuming it is valid. Returns None if the evaluation - /// fails or the model is invalid, or an EvaluationResult otherwise. The - /// inputs are assumed to have been already provided via getInput(). When - /// returning None, it also invalidates this object. - Optional evaluate(); - - /// Provides access to the input vector. - template T *getInput(size_t Index) { - return static_cast(getUntypedInput(Index)); - } - - /// Returns true if the tensorflow model was loaded successfully, false - /// otherwise. - bool isValid() const { return !!Impl; } - - /// Untyped access to input. - void *getUntypedInput(size_t Index); - -private: - std::unique_ptr Impl; -}; - } // namespace llvm #endif // LLVM_HAVE_TF_API -#endif // LLVM_ANALYSIS_UTILS_TFUTILS_H +#endif // LLVM_ANALYSIS_UTILS_TRAININGLOGGER_H diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -130,10 +130,11 @@ SyncDependenceAnalysis.cpp SyntheticCountsUtils.cpp TFUtils.cpp - TensorSpec.cpp TargetLibraryInfo.cpp TargetTransformInfo.cpp + TensorSpec.cpp Trace.cpp + TrainingLogger.cpp TypeBasedAliasAnalysis.cpp TypeMetadataUtils.cpp ScopedNoAliasAA.cpp diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp --- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" #include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/Analysis/Utils/TrainingLogger.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -22,23 +22,13 @@ #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include "google/protobuf/struct.pb.h" -#include "google/protobuf/text_format.h" #include "tensorflow/c/c_api.h" #include "tensorflow/c/c_api_experimental.h" -#include "tensorflow/core/example/example.pb.h" #include #include using namespace llvm; -using google::protobuf::Message; -using google::protobuf::TextFormat; - -static cl::opt - ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden, - cl::desc("Output textual (human-readable) protobuf.")); - namespace { using TFGraphPtr = std::unique_ptr; @@ -72,14 +62,6 @@ return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); } -void serialize(const Message &SE, std::string *OutStr) { - if (ProtobufTextMode) { - TextFormat::PrintToString(SE, OutStr); - } else { - *OutStr = SE.SerializeAsString(); - } -} - int getTFTypeIndex(TensorType TType) { switch (TType) { case TensorType::Double: @@ -182,99 +164,6 @@ const TensorSpec &OutputSpec); }; -class LoggerDataImpl { - const std::vector LoggedFeatureSpecs; - const TensorSpec RewardSpec; - const bool IncludeReward; - - std::vector FeatureLists; - tensorflow::FeatureList Reward; - - bool isSelfConsistent(const tensorflow::SequenceExample &SE, - size_t NrRecords) const { - bool Ret = true; - for (const auto &TSpecs : LoggedFeatureSpecs) { - const auto &Name = TSpecs.getLoggingName(); - const auto &FL = SE.feature_lists().feature_list().at(Name).feature(); - if (NrRecords != static_cast(FL.size())) { - dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected " - << NrRecords << " got " << FL.size() << "\n"; - Ret = false; - } - } - if (IncludeReward && static_cast(SE.feature_lists() - .feature_list() - .at(RewardSpec.name()) - .feature() - .size()) != NrRecords) { - dbgs() << "[TF-UTILS]: reward is missing records.\n"; - Ret = false; - } - return Ret; - } - - void transferLog(tensorflow::SequenceExample &SE) { - auto *FL = SE.mutable_feature_lists()->mutable_feature_list(); - if (IncludeReward) - (*FL)[RewardSpec.name()] = std::move(Reward); - assert(FeatureLists.size() == LoggedFeatureSpecs.size()); - for (size_t I = 0; I < FeatureLists.size(); ++I) { - const auto &LFS = LoggedFeatureSpecs[I]; - (*FL)[LFS.getLoggingName()] = std::move(FeatureLists[I]); - } - } - -public: - LoggerDataImpl(const std::vector &LoggedSpecs, - const TensorSpec &RewardSpec, bool IncludeReward) - : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec), - IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {} - - // flush the logged info to a stream and clear the log contents. - void flush(std::string *Str) { - size_t NrRecords = getNrRecords(); - (void)NrRecords; - tensorflow::SequenceExample SE; - transferLog(SE); - assert(isSelfConsistent(SE, NrRecords)); - serialize(SE, Str); - } - - char *addNewTensor(size_t FeatureID) { - const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec; - if (Spec.isElementType()) { - auto *RF = FeatureLists[FeatureID] - .add_feature() - ->mutable_float_list() - ->mutable_value(); - RF->Resize(Spec.getElementCount(), 0.0); - return reinterpret_cast(RF->mutable_data()); - } else if (Spec.isElementType() || Spec.isElementType()) { - auto *RF = FeatureLists[FeatureID] - .add_feature() - ->mutable_int64_list() - ->mutable_value(); - RF->Resize(Spec.getElementCount(), 0); - return reinterpret_cast(RF->mutable_data()); - } - llvm_unreachable("Unsupported tensor type."); - } - - template void logReward(T Value) { - assert(IncludeReward); - if (RewardSpec.isElementType()) - Reward.add_feature()->mutable_float_list()->add_value(Value); - else if (RewardSpec.isElementType() || - RewardSpec.isElementType()) - Reward.add_feature()->mutable_int64_list()->add_value(Value); - else - llvm_unreachable("Unsupported tensor type."); - } - - size_t getNrRecords() const { - return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size(); - } -}; } // namespace llvm TFModelEvaluatorImpl::TFModelEvaluatorImpl( @@ -427,97 +316,4 @@ TFModelEvaluator::EvaluationResult::~EvaluationResult() {} TFModelEvaluator::~TFModelEvaluator() {} -Logger::Logger(const std::vector &FeatureSpecs, - const TensorSpec &RewardSpec, bool IncludeReward) - : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), - IncludeReward(IncludeReward), - LoggerData(std::make_unique(FeatureSpecs, RewardSpec, - IncludeReward)) {} - -Logger::~Logger() {} - -#define LOG_REWARD(NAME, TYPE) \ - void Logger::log##NAME##Reward(TYPE Value) { \ - assert(IncludeReward); \ - LoggerData->logReward(Value); \ - } - -LOG_REWARD(Float, float) -LOG_REWARD(Int32, int32_t) -LOG_REWARD(Int64, int64_t) -#undef LOG_REWARD - -#define LOG_FINAL_REWARD(NAME, TYPE) \ - void Logger::log##NAME##FinalReward(TYPE Value) { \ - assert(RewardSpec.isElementType()); \ - for (size_t I = 1; I < LoggerData->getNrRecords(); ++I) \ - log##NAME##Reward(0); \ - log##NAME##Reward(Value); \ - } - -LOG_FINAL_REWARD(Float, float) -LOG_FINAL_REWARD(Int32, int32_t) -LOG_FINAL_REWARD(Int64, int64_t) -#undef LOG_FINAL_REWARD - -void Logger::logFloatValue(size_t FeatureID, const float *Value) { - assert(FeatureSpecs[FeatureID].Spec.isElementType()); - logSpecifiedTensorValue(FeatureID, reinterpret_cast(Value)); -} - -void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) { - assert(FeatureSpecs[FeatureID].Spec.isElementType()); - logSpecifiedTensorValue(FeatureID, reinterpret_cast(Value)); -} - -void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) { - assert(FeatureSpecs[FeatureID].Spec.isElementType()); - logSpecifiedTensorValue(FeatureID, reinterpret_cast(Value)); -} - -void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) { - const auto &Spec = FeatureSpecs[FeatureID].Spec; - char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID); - if (Spec.isElementType()) - for (size_t I = 0; I < Spec.getElementCount(); ++I) - (reinterpret_cast(Buff))[I] = - static_cast((reinterpret_cast(RawData))[I]); - else if (Spec.isElementType() || Spec.isElementType()) - std::memcpy(Buff, RawData, - Spec.getElementCount() * Spec.getElementByteSize()); - else - llvm_unreachable("Unsupported tensor type"); -} - -char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) { - return reinterpret_cast(LoggerData->addNewTensor(FeatureID)); -} - -void Logger::flush(std::string *Str) { LoggerData->flush(Str); } - -void Logger::flush(raw_ostream &OS) { - std::string Buff; - LoggerData->flush(&Buff); - OS << Buff; -} - -void Logger::flushLogs(raw_ostream &OS, - const StringMap> &Loggers) { - google::protobuf::Struct Msg; - for (const auto &NamedLogger : Loggers) { - tensorflow::SequenceExample SE; - const auto &Logger = NamedLogger.second; - std::string Unencoded; - if (Logger->LoggerData->getNrRecords() > 0) - Logger->flush(&Unencoded); - - (*Msg.mutable_fields())[NamedLogger.first().str()] - .mutable_string_value() - ->append(ProtobufTextMode ? Unencoded : encodeBase64(Unencoded)); - } - - std::string OutStr; - serialize(Msg, &OutStr); - OS << OutStr; -} #endif // defined(LLVM_HAVE_TF_API) diff --git a/llvm/lib/Analysis/TrainingLogger.cpp b/llvm/lib/Analysis/TrainingLogger.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/TrainingLogger.cpp @@ -0,0 +1,242 @@ +//===- TrainingLogger.cpp - mlgo feature/reward logging -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements logging infrastructure for extracting features and +// rewards for mlgo policy training. +// +//===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" +#if defined(LLVM_HAVE_TF_API) + +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/Utils/TrainingLogger.h" +#include "llvm/Support/Base64.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +#include "google/protobuf/struct.pb.h" +#include "google/protobuf/text_format.h" +#include "tensorflow/core/example/example.pb.h" +#include +#include + +using namespace llvm; + +using google::protobuf::Message; +using google::protobuf::TextFormat; + +static cl::opt + ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden, + cl::desc("Output textual (human-readable) protobuf.")); + +namespace { + +void serialize(const Message &SE, std::string *OutStr) { + if (ProtobufTextMode) { + TextFormat::PrintToString(SE, OutStr); + } else { + *OutStr = SE.SerializeAsString(); + } +} +} // namespace + +namespace llvm { + +class LoggerDataImpl { + const std::vector LoggedFeatureSpecs; + const TensorSpec RewardSpec; + const bool IncludeReward; + + std::vector FeatureLists; + tensorflow::FeatureList Reward; + + bool isSelfConsistent(const tensorflow::SequenceExample &SE, + size_t NrRecords) const { + bool Ret = true; + for (const auto &TSpecs : LoggedFeatureSpecs) { + const auto &Name = TSpecs.getLoggingName(); + const auto &FL = SE.feature_lists().feature_list().at(Name).feature(); + if (NrRecords != static_cast(FL.size())) { + dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected " + << NrRecords << " got " << FL.size() << "\n"; + Ret = false; + } + } + if (IncludeReward && static_cast(SE.feature_lists() + .feature_list() + .at(RewardSpec.name()) + .feature() + .size()) != NrRecords) { + dbgs() << "[TF-UTILS]: reward is missing records.\n"; + Ret = false; + } + return Ret; + } + + void transferLog(tensorflow::SequenceExample &SE) { + auto *FL = SE.mutable_feature_lists()->mutable_feature_list(); + if (IncludeReward) + (*FL)[RewardSpec.name()] = std::move(Reward); + assert(FeatureLists.size() == LoggedFeatureSpecs.size()); + for (size_t I = 0; I < FeatureLists.size(); ++I) { + const auto &LFS = LoggedFeatureSpecs[I]; + (*FL)[LFS.getLoggingName()] = std::move(FeatureLists[I]); + } + } + +public: + LoggerDataImpl(const std::vector &LoggedSpecs, + const TensorSpec &RewardSpec, bool IncludeReward) + : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec), + IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {} + + // flush the logged info to a stream and clear the log contents. + void flush(std::string *Str) { + size_t NrRecords = getNrRecords(); + (void)NrRecords; + tensorflow::SequenceExample SE; + transferLog(SE); + assert(isSelfConsistent(SE, NrRecords)); + serialize(SE, Str); + } + + char *addNewTensor(size_t FeatureID) { + const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec; + if (Spec.isElementType()) { + auto *RF = FeatureLists[FeatureID] + .add_feature() + ->mutable_float_list() + ->mutable_value(); + RF->Resize(Spec.getElementCount(), 0.0); + return reinterpret_cast(RF->mutable_data()); + } else if (Spec.isElementType() || Spec.isElementType()) { + auto *RF = FeatureLists[FeatureID] + .add_feature() + ->mutable_int64_list() + ->mutable_value(); + RF->Resize(Spec.getElementCount(), 0); + return reinterpret_cast(RF->mutable_data()); + } + llvm_unreachable("Unsupported tensor type."); + } + + template void logReward(T Value) { + assert(IncludeReward); + if (RewardSpec.isElementType()) + Reward.add_feature()->mutable_float_list()->add_value(Value); + else if (RewardSpec.isElementType() || + RewardSpec.isElementType()) + Reward.add_feature()->mutable_int64_list()->add_value(Value); + else + llvm_unreachable("Unsupported tensor type."); + } + + size_t getNrRecords() const { + return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size(); + } +}; +} // namespace llvm + +Logger::Logger(const std::vector &FeatureSpecs, + const TensorSpec &RewardSpec, bool IncludeReward) + : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), + IncludeReward(IncludeReward), + LoggerData(std::make_unique(FeatureSpecs, RewardSpec, + IncludeReward)) {} + +Logger::~Logger() {} + +#define LOG_REWARD(NAME, TYPE) \ + void Logger::log##NAME##Reward(TYPE Value) { \ + assert(IncludeReward); \ + LoggerData->logReward(Value); \ + } + +LOG_REWARD(Float, float) +LOG_REWARD(Int32, int32_t) +LOG_REWARD(Int64, int64_t) +#undef LOG_REWARD + +#define LOG_FINAL_REWARD(NAME, TYPE) \ + void Logger::log##NAME##FinalReward(TYPE Value) { \ + assert(RewardSpec.isElementType()); \ + for (size_t I = 1; I < LoggerData->getNrRecords(); ++I) \ + log##NAME##Reward(0); \ + log##NAME##Reward(Value); \ + } + +LOG_FINAL_REWARD(Float, float) +LOG_FINAL_REWARD(Int32, int32_t) +LOG_FINAL_REWARD(Int64, int64_t) +#undef LOG_FINAL_REWARD + +void Logger::logFloatValue(size_t FeatureID, const float *Value) { + assert(FeatureSpecs[FeatureID].Spec.isElementType()); + logSpecifiedTensorValue(FeatureID, reinterpret_cast(Value)); +} + +void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) { + assert(FeatureSpecs[FeatureID].Spec.isElementType()); + logSpecifiedTensorValue(FeatureID, reinterpret_cast(Value)); +} + +void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) { + assert(FeatureSpecs[FeatureID].Spec.isElementType()); + logSpecifiedTensorValue(FeatureID, reinterpret_cast(Value)); +} + +void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) { + const auto &Spec = FeatureSpecs[FeatureID].Spec; + char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID); + if (Spec.isElementType()) + for (size_t I = 0; I < Spec.getElementCount(); ++I) + (reinterpret_cast(Buff))[I] = + static_cast((reinterpret_cast(RawData))[I]); + else if (Spec.isElementType() || Spec.isElementType()) + std::memcpy(Buff, RawData, + Spec.getElementCount() * Spec.getElementByteSize()); + else + llvm_unreachable("Unsupported tensor type"); +} + +char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) { + return reinterpret_cast(LoggerData->addNewTensor(FeatureID)); +} + +void Logger::flush(std::string *Str) { LoggerData->flush(Str); } + +void Logger::flush(raw_ostream &OS) { + std::string Buff; + LoggerData->flush(&Buff); + OS << Buff; +} + +void Logger::flushLogs(raw_ostream &OS, + const StringMap> &Loggers) { + google::protobuf::Struct Msg; + for (const auto &NamedLogger : Loggers) { + tensorflow::SequenceExample SE; + const auto &Logger = NamedLogger.second; + std::string Unencoded; + if (Logger->LoggerData->getNrRecords() > 0) + Logger->flush(&Unencoded); + + (*Msg.mutable_fields())[NamedLogger.first().str()] + .mutable_string_value() + ->append(ProtobufTextMode ? Unencoded : encodeBase64(Unencoded)); + } + + std::string OutStr; + serialize(Msg, &OutStr); + OS << OutStr; +} +#endif // defined(LLVM_HAVE_TF_API) diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -18,6 +18,7 @@ #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" +#include "llvm/Analysis/Utils/TrainingLogger.h" #endif #include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/CodeGen/CalcSpillWeights.h" diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -6,7 +6,7 @@ TransformUtils ) -set(MLGO_TESTS TFUtilsTest.cpp) +set(MLGO_TESTS TFUtilsTest.cpp TrainingLoggerTest.cpp) if (DEFINED LLVM_HAVE_TF_API) LIST(APPEND EXTRA_TESTS ${MLGO_TESTS}) else() diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp --- a/llvm/unittests/Analysis/TFUtilsTest.cpp +++ b/llvm/unittests/Analysis/TFUtilsTest.cpp @@ -7,9 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Utils/TFUtils.h" -#include "google/protobuf/struct.pb.h" -#include "tensorflow/core/example/example.pb.h" -#include "tensorflow/core/example/feature.pb.h" #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/TensorSpec.h" #include "llvm/AsmParser/Parser.h" @@ -133,171 +130,3 @@ for (auto I = 0; I < 2 * 5; ++I) EXPECT_FLOAT_EQ(F[I], 3.14 + I); } - -#define PROTO_CHECKER(FNAME, TYPE, INDEX, EXP) \ - do { \ - const auto &V = Expected.feature_lists() \ - .feature_list() \ - .at(FNAME) \ - .feature(INDEX) \ - .TYPE() \ - .value(); \ - for (auto I = 0; I < V.size(); ++I) \ - EXPECT_EQ(V.at(I), EXP[I]); \ - } while (false) - -TEST(TFUtilsTest, Logger) { - std::vector Features; - Features.push_back( - {TensorSpec::createSpec("the_float", {2, 3}), None}); - Features.push_back({TensorSpec::createSpec("the_int", {2}), - std::string("alternate_name")}); - - auto Rewards = TensorSpec::createSpec("reward", {1}); - Logger L(Features, Rewards, true); - const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5}; - const int64_t F01[]{2, 3}; - - L.logFloatValue(0, F00); - L.logInt64Value(1, F01); - L.logFloatReward(3.4); - const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; - const int64_t F11[]{-2, -3}; - L.logFloatValue(0, F10); - L.logInt64Value(1, F11); - L.logFloatReward(-3.0); - std::string Result; - raw_string_ostream OS(Result); - L.flush(OS); - - tensorflow::SequenceExample Expected; - ASSERT_TRUE(Expected.ParseFromString(Result)); - PROTO_CHECKER("the_float", float_list, 0, F00); - PROTO_CHECKER("the_float", float_list, 1, F10); - PROTO_CHECKER("alternate_name", int64_list, 0, F01); - PROTO_CHECKER("alternate_name", int64_list, 1, F11); - float R0[]{3.4}; - float R1[]{-3.0}; - PROTO_CHECKER("reward", float_list, 0, R0); - PROTO_CHECKER("reward", float_list, 1, R1); -} - -TEST(TFUtilsTest, LoggerInt32FeaturesAndReward) { - std::vector Features; - Features.push_back( - {TensorSpec::createSpec("the_float", {2, 3}), None}); - Features.push_back({TensorSpec::createSpec("the_int", {2}), - std::string("alternate_name")}); - - auto Rewards = TensorSpec::createSpec("reward", {1}); - Logger L(Features, Rewards, true); - const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5}; - const int32_t F01[]{2, 3}; - - L.logFloatValue(0, F00); - L.logInt32Value(1, F01); - L.logInt32Reward(3); - const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; - const int32_t F11[]{-2, -3}; - L.logFloatValue(0, F10); - L.logInt32Value(1, F11); - L.logInt32Reward(-3); - std::string Result; - raw_string_ostream OS(Result); - L.flush(OS); - - tensorflow::SequenceExample Expected; - ASSERT_TRUE(Expected.ParseFromString(Result)); - PROTO_CHECKER("the_float", float_list, 0, F00); - PROTO_CHECKER("the_float", float_list, 1, F10); - PROTO_CHECKER("alternate_name", int64_list, 0, F01); - PROTO_CHECKER("alternate_name", int64_list, 1, F11); - int32_t R0[]{3}; - int32_t R1[]{-3}; - PROTO_CHECKER("reward", int64_list, 0, R0); - PROTO_CHECKER("reward", int64_list, 1, R1); -} - -TEST(TFUtilsTest, LoggerNoReward) { - std::vector Features; - Features.push_back( - {TensorSpec::createSpec("the_float", {2, 3}), None}); - Features.push_back({TensorSpec::createSpec("the_int", {2}), - std::string("alternate_name")}); - - auto Rewards = TensorSpec::createSpec("reward", {1}); - Logger L(Features, Rewards, false); - const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5}; - const int64_t F01[]{2, 3}; - - L.logFloatValue(0, F00); - L.logInt64Value(1, F01); - const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; - const int64_t F11[]{-2, -3}; - L.logFloatValue(0, F10); - L.logInt64Value(1, F11); - - std::string Result; - raw_string_ostream OS(Result); - L.flush(OS); - tensorflow::SequenceExample Expected; - ASSERT_TRUE(Expected.ParseFromString(Result)); - PROTO_CHECKER("the_float", float_list, 0, F00); - PROTO_CHECKER("the_float", float_list, 1, F10); - PROTO_CHECKER("alternate_name", int64_list, 0, F01); - PROTO_CHECKER("alternate_name", int64_list, 1, F11); -} - -TEST(TFUtilsTest, LoggerFinalReward) { - std::vector Features; - Features.push_back({TensorSpec::createSpec("the_float", {1}), None}); - Features.push_back({TensorSpec::createSpec("the_int", {1}), None}); - - auto Rewards = TensorSpec::createSpec("reward", {1}); - Logger L(Features, Rewards, true); - for (int64_t I = 0; I < 3; ++I) { - float F = static_cast(I); - L.logFloatValue(0, &F); - L.logInt64Value(1, &I); - } - L.logFloatFinalReward(3.14); - std::string Result; - raw_string_ostream OS(Result); - L.flush(OS); - const float Zero[]{0.0}; - const float R[]{3.14}; - tensorflow::SequenceExample Expected; - ASSERT_TRUE(Expected.ParseFromString(Result)); - PROTO_CHECKER("reward", float_list, 0, Zero); - PROTO_CHECKER("reward", float_list, 1, Zero); - PROTO_CHECKER("reward", float_list, 2, R); -} - -TEST(TFUtilsTest, LoggerGroup) { - std::vector Features; - Features.push_back({TensorSpec::createSpec("the_float", {1}), None}); - Features.push_back({TensorSpec::createSpec("the_int", {1}), None}); - - auto Rewards = TensorSpec::createSpec("reward", {1}); - StringMap> Loggers; - std::vector Names{"a", "b"}; - size_t Bump = 0; - for (auto Name : Names) { - auto L = std::make_unique(Features, Rewards, true); - for (int64_t I = 0; I < 3; ++I) { - float F = static_cast(I) + Bump; - L->logFloatValue(0, &F); - L->logInt64Value(1, &I); - } - L->logFloatFinalReward(3.14 + Bump); - Loggers.insert(std::make_pair(Name, std::move(L))); - } - std::string Result; - raw_string_ostream OS(Result); - Logger::flushLogs(OS, Loggers); - google::protobuf::Struct Expected; - ASSERT_TRUE(Expected.ParseFromString(Result)); - EXPECT_EQ(Expected.fields_size(), 2); - EXPECT_TRUE(Expected.fields().contains("a")); - EXPECT_TRUE(Expected.fields().contains("b")); -} diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TrainingLoggerTest.cpp copy from llvm/unittests/Analysis/TFUtilsTest.cpp copy to llvm/unittests/Analysis/TrainingLoggerTest.cpp --- a/llvm/unittests/Analysis/TFUtilsTest.cpp +++ b/llvm/unittests/Analysis/TrainingLoggerTest.cpp @@ -1,4 +1,4 @@ -//===- TFUtilsTest.cpp - test for TFUtils ---------------------------------===// +//===- TrainingLoggerTest.cpp - test for TrainingLogger -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,11 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/Analysis/Utils/TrainingLogger.h" #include "google/protobuf/struct.pb.h" #include "tensorflow/core/example/example.pb.h" #include "tensorflow/core/example/feature.pb.h" -#include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/TensorSpec.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Dominators.h" @@ -26,113 +25,9 @@ extern const char *TestMainArgv0; -// NOTE! This test model is currently also used by test/Transforms/Inline/ML tests +// NOTE! This test model is currently also used by test/Transforms/Inline/ML +// tests //- relevant if updating this model. -static std::string getModelPath() { - SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0); - llvm::sys::path::append(InputsDir, "ir2native_x86_64_model"); - return std::string(InputsDir); -} - -// Test observable behavior when no model is provided. -TEST(TFUtilsTest, NoModel) { - TFModelEvaluator Evaluator("", {}, {}); - EXPECT_FALSE(Evaluator.isValid()); -} - -// Test we can correctly load a savedmodel and evaluate it. -TEST(TFUtilsTest, LoadAndExecuteTest) { - // We use the ir2native model for test. We know it has one feature of - // dimension (1, 214) - const static int64_t KnownSize = 214; - std::vector InputSpecs{TensorSpec::createSpec( - "serving_default_input_1", {1, KnownSize})}; - std::vector OutputSpecs{ - TensorSpec::createSpec("StatefulPartitionedCall", {1})}; - - TFModelEvaluator Evaluator(getModelPath(), InputSpecs, OutputSpecs); - EXPECT_TRUE(Evaluator.isValid()); - - int32_t *V = Evaluator.getInput(0); - // Fill it up with 1's, we know the output. - for (auto I = 0; I < KnownSize; ++I) { - V[I] = 1; - } - { - auto ER = Evaluator.evaluate(); - EXPECT_TRUE(ER.hasValue()); - float Ret = *ER->getTensorValue(0); - EXPECT_EQ(static_cast(Ret), 80); - EXPECT_EQ(ER->getUntypedTensorValue(0), - reinterpret_cast(ER->getTensorValue(0))); - } - // The input vector should be unchanged - for (auto I = 0; I < KnownSize; ++I) { - EXPECT_EQ(V[I], 1); - } - // Zero-out the unused position '0' of the instruction histogram, which is - // after the first 9 calculated values. Should the the same result. - V[9] = 0; - { - auto ER = Evaluator.evaluate(); - EXPECT_TRUE(ER.hasValue()); - float Ret = *ER->getTensorValue(0); - EXPECT_EQ(static_cast(Ret), 80); - } -} - -// Test incorrect input setup -TEST(TFUtilsTest, EvalError) { - // We use the ir2native model for test. We know it has one feature of - // dimension (1, 214) - const static int64_t KnownSize = 213; - std::vector InputSpecs{TensorSpec::createSpec( - "serving_default_input_1", {1, KnownSize})}; - std::vector OutputSpecs{ - TensorSpec::createSpec("StatefulPartitionedCall", {1})}; - - TFModelEvaluator Evaluator(getModelPath(), InputSpecs, OutputSpecs); - EXPECT_TRUE(Evaluator.isValid()); - - int32_t *V = Evaluator.getInput(0); - // Fill it up with 1's, we know the output. - for (auto I = 0; I < KnownSize; ++I) { - V[I] = 1; - } - auto ER = Evaluator.evaluate(); - EXPECT_FALSE(ER.hasValue()); - EXPECT_FALSE(Evaluator.isValid()); -} - -TEST(TFUtilsTest, UnsupportedFeature) { - const static int64_t KnownSize = 214; - std::vector InputSpecs{ - TensorSpec::createSpec("serving_default_input_1", - {1, KnownSize}), - TensorSpec::createSpec("this_feature_does_not_exist", {2, 5})}; - - LLVMContext Ctx; - auto Evaluator = ModelUnderTrainingRunner::createAndEnsureValid( - Ctx, getModelPath(), "StatefulPartitionedCall", InputSpecs, - {LoggedFeatureSpec{ - TensorSpec::createSpec("StatefulPartitionedCall", {1}), - None}}); - int32_t *V = Evaluator->getTensor(0); - // Fill it up with 1s, we know the output. - for (auto I = 0; I < KnownSize; ++I) - V[I] = 1; - - float *F = Evaluator->getTensor(1); - for (auto I = 0; I < 2 * 5; ++I) - F[I] = 3.14 + I; - float Ret = Evaluator->evaluate(); - EXPECT_EQ(static_cast(Ret), 80); - // The input vector should be unchanged - for (auto I = 0; I < KnownSize; ++I) - EXPECT_EQ(V[I], 1); - for (auto I = 0; I < 2 * 5; ++I) - EXPECT_FLOAT_EQ(F[I], 3.14 + I); -} #define PROTO_CHECKER(FNAME, TYPE, INDEX, EXP) \ do { \ @@ -146,7 +41,7 @@ EXPECT_EQ(V.at(I), EXP[I]); \ } while (false) -TEST(TFUtilsTest, Logger) { +TEST(TrainingLoggerTest, Logger) { std::vector Features; Features.push_back( {TensorSpec::createSpec("the_float", {2, 3}), None}); @@ -182,7 +77,7 @@ PROTO_CHECKER("reward", float_list, 1, R1); } -TEST(TFUtilsTest, LoggerInt32FeaturesAndReward) { +TEST(TrainingLoggerTest, LoggerInt32FeaturesAndReward) { std::vector Features; Features.push_back( {TensorSpec::createSpec("the_float", {2, 3}), None}); @@ -218,7 +113,7 @@ PROTO_CHECKER("reward", int64_list, 1, R1); } -TEST(TFUtilsTest, LoggerNoReward) { +TEST(TrainingLoggerTest, LoggerNoReward) { std::vector Features; Features.push_back( {TensorSpec::createSpec("the_float", {2, 3}), None}); @@ -248,7 +143,7 @@ PROTO_CHECKER("alternate_name", int64_list, 1, F11); } -TEST(TFUtilsTest, LoggerFinalReward) { +TEST(TrainingLoggerTest, LoggerFinalReward) { std::vector Features; Features.push_back({TensorSpec::createSpec("the_float", {1}), None}); Features.push_back({TensorSpec::createSpec("the_int", {1}), None}); @@ -273,7 +168,7 @@ PROTO_CHECKER("reward", float_list, 2, R); } -TEST(TFUtilsTest, LoggerGroup) { +TEST(TrainingLoggerTest, LoggerGroup) { std::vector Features; Features.push_back({TensorSpec::createSpec("the_float", {1}), None}); Features.push_back({TensorSpec::createSpec("the_int", {1}), None});