diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -788,8 +788,23 @@ set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install") if (TENSORFLOW_C_LIB_PATH) find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib NO_DEFAULT_PATH REQUIRED) + # Currently, the protobuf headers are distributed with the pip package that corresponds to the version + # of the C API library. + find_library(tensorflow_fx tensorflow_framework PATHS ${TENSORFLOW_C_LIB_PATH}/lib NO_DEFAULT_PATH REQUIRED) set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available") include_directories(${TENSORFLOW_C_LIB_PATH}/include) + execute_process(COMMAND + ${Python3_EXECUTABLE} "-m" "pip" "show" "tensorflow" + OUTPUT_VARIABLE TF_PIP_OUT) + if ("${TF_PIP_OUT}" STREQUAL "") + message(FATAL ERROR "Tensorflow pip package is also required for 'development' mode (protobuf headers)") + endif() + string(REGEX MATCH "Location: ([^\n]*\n)" TF_PIP_LOC "${TF_PIP_OUT}") + string(REPLACE "Location: " "" TF_PIP ${TF_PIP_LOC}) + set(TF_PROTO_HEADERS ${TF_PIP}/include) + include_directories(${TF_PROTO_HEADERS}) + add_definitions("-DGOOGLE_PROTOBUF_NO_RTTI") + add_definitions("-D_GLIBCXX_USE_CXX11_ABI=0") endif() # For up-to-date instructions for installing the Tensorflow dependency, refer to diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -27,7 +27,7 @@ endif() if (DEFINED LLVM_HAVE_TF_API) - list(APPEND MLLinkDeps ${tensorflow_c_api}) + list(APPEND MLLinkDeps ${tensorflow_c_api} ${tensorflow_fx}) endif() endif() diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/JSON.h" #include "llvm/Support/ManagedStatic.h" @@ -22,14 +23,19 @@ #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include "google/protobuf/text_format.h" #include "tensorflow/c/c_api.h" #include "tensorflow/c/c_api_experimental.h" - +#include "tensorflow/core/example/example.pb.h" #include #include using namespace llvm; +static cl::opt + ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden, + cl::desc("Output textual (human-readable) protobuf.")); + namespace { using TFGraphPtr = std::unique_ptr; @@ -65,85 +71,52 @@ return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); } -/// Write the values of one tensor as a list. -template -void writeTensorValues(raw_ostream &OutFile, const char *TensorData, - size_t ElemCount) { - OutFile << "["; - const T *TypedData = reinterpret_cast(TensorData); - ListSeparator LS; - for (size_t I = 0; I < ElemCount; ++I) - OutFile << LS << TypedData[I]; - OutFile << "]"; -} - /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs. /// The tensors are assumed to be stored contiguously, in row-major format, /// in the TensorData buffer. Each tensor has the shape given by Spec. The /// feature name in the output is either the provided LoggingName, if /// specified, otherwise it's the name of the tensor (as given by Spec). -void writeRawTensorsAsFeatureLists(raw_ostream &OutFile, +void writeRawTensorsAsFeatureLists(tensorflow::FeatureLists *FE, const LoggedFeatureSpec &LoggedSpec, const char *TensorData, size_t TensorCount, bool FinalReward = false) { - const char *FieldName = ""; - std::function ValueWriter; const auto &Spec = LoggedSpec.Spec; // The 'Feature' protobuf only has 3 possible fields: float_list, // int64_list, or bytes_list, so we capture int32 values as int64. We don't // support any other types. - if (Spec.isElementType()) { - FieldName = "int64_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues(OutFile, Data, Spec.getElementCount()); - }; - } else if (Spec.isElementType()) { - FieldName = "int64_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues(OutFile, Data, Spec.getElementCount()); - }; - - } else if (Spec.isElementType()) { - FieldName = "float_list"; - ValueWriter = [&](const char *Data) { - writeTensorValues(OutFile, Data, Spec.getElementCount()); - }; - - } else { - llvm_unreachable("Unsupported tensor type."); - } - - OutFile << " feature_list: {\n"; - OutFile << " key: " - << "\"" - << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name()) - << "\" "; - OutFile << "value: {\n"; - size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize(); - - auto WriteFeatureProto = [&](const char *P) { - OutFile << " feature: { " << FieldName << ": { value: "; - ValueWriter(P); - OutFile << " } }\n"; - }; + tensorflow::FeatureList &FL = (*FE->mutable_feature_list())[( + LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name())]; const char *CurrentTensor = TensorData; - static int64_t Zero = 0; - // Write all but the last value. If this is the final reward, don't increment - // the CurrentTensor, and just write 0. - for (size_t I = 0; I < TensorCount - 1; ++I) { - if (FinalReward) - WriteFeatureProto(reinterpret_cast(&Zero)); - else { - WriteFeatureProto(CurrentTensor); - CurrentTensor += TensorByteSize; + const size_t TensorByteSize = + Spec.getElementCount() * Spec.getElementByteSize(); + const size_t ElemCount = Spec.getElementCount(); + for (size_t E = 0; E < TensorCount; ++E) { + const bool ShouldWrite = E + 1 == TensorCount || !FinalReward; + + if (Spec.isElementType()) { + auto *MF = FL.add_feature()->mutable_int64_list()->mutable_value(); + MF->Resize(ElemCount, 0); + if (ShouldWrite) + memcpy(MF->mutable_data(), CurrentTensor, TensorByteSize); + } else if (Spec.isElementType()) { + auto *MF = FL.add_feature()->mutable_int64_list()->mutable_value(); + MF->Resize(ElemCount, 0); + if (ShouldWrite) { + const int32_t *TD = reinterpret_cast(CurrentTensor); + for (size_t I = 0; I < ElemCount; ++I) + (*MF)[I] = TD[I]; + } + } else if (Spec.isElementType()) { + auto *MF = FL.add_feature()->mutable_float_list()->mutable_value(); + MF->Resize(ElemCount, 0.0); + if (ShouldWrite) + memcpy(MF->mutable_data(), CurrentTensor, TensorByteSize); + } else { + llvm_unreachable("Unsupported tensor type."); } + CurrentTensor += TensorByteSize; } - - WriteFeatureProto(CurrentTensor); - - OutFile << " }\n"; - OutFile << " }\n"; } } // namespace @@ -475,6 +448,8 @@ TFModelEvaluator::~TFModelEvaluator() {} void Logger::print(raw_ostream &OS) { + tensorflow::SequenceExample SE; + if (RawLogData.empty()) return; if (RawLogData[0].empty()) @@ -488,16 +463,21 @@ RewardSpec.getElementCount() * RewardSpec.getElementByteSize(); size_t NumberOfRewards = RawLogData.back().size() / RewardSize; - OS << "feature_lists: {\n"; + tensorflow::FeatureLists *FE = SE.mutable_feature_lists(); for (size_t I = 0; I < FeatureSpecs.size(); ++I) - writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(), + writeRawTensorsAsFeatureLists(FE, FeatureSpecs[I], RawLogData[I].data(), NumberOfRecords); if (IncludeReward) - writeRawTensorsAsFeatureLists(OS, {RewardSpec, None}, + writeRawTensorsAsFeatureLists(FE, {RewardSpec, None}, RawLogData.back().data(), NumberOfRecords, NumberOfRewards == 1); - - OS << "}\n"; + std::string OutStr; + if (ProtobufTextMode) { + google::protobuf::TextFormat::PrintToString(SE, &OutStr); + } else { + OutStr = SE.SerializeAsString(); + } + OS << OutStr; } #endif // defined(LLVM_HAVE_TF_API)