diff --git a/llvm/include/llvm/Analysis/EmitCModelRegistry.h b/llvm/include/llvm/Analysis/EmitCModelRegistry.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Analysis/EmitCModelRegistry.h
@@ -0,0 +1,75 @@
+//===- EmitCModelRegistry.h ---- Registry for EmitC models  -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a registry for EmitC generated models. The idea is that
+// generated models register themselves here, and then optimization passes can
+// look up each model by the generated string. This separates concerns between
+// people who want to integrate new models for existing ML optimization passes
+// (ml inline -Oz, for example) and people who want to expose new passes to ML.
+//
+// The normal case should be that EmitC models should be selected via a command
+// line flag, whose string value is passed to the registry as a lookup.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_EMITCMODELREGISTRY_H
+#define LLVM_ANALYSIS_EMITCMODELREGISTRY_H
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace llvm {
+
+// Meyer singleton representing the registry. There will be one instance of the
+// registry for each ModelT type, which represents the interface for a
+// particular model (inlining, regalloc, etc).
+template <class ModelT> class EmitCModelRegistry {
+public:
+  static EmitCModelRegistry &get() {
+    static EmitCModelRegistry Registry;
+    return Registry;
+  }
+
+  void addModel(std::unique_ptr<ModelT> Model) {
+    Models[Model->name()] = std::move(Model);
+  }
+
+  // It is up to the consumer to handle the case where nullptr is returned.
+  ModelT *getModel(const std::string &Name) {
+    auto itr = Models.find(Name);
+    if (itr == std::end(Models)) {
+      return nullptr;
+    }
+    return itr->second.get();
+  }
+
+private:
+  EmitCModelRegistry() {}
+
+  std::unordered_map<std::string, std::unique_ptr<ModelT>> Models;
+};
+
+// Helper class whose constructor performs a model registration. Constructing
+// an object of this type is all you need to do to register the model.
+template <class ModelT> class EmitCModelRegistrationHandle {
+public:
+  EmitCModelRegistrationHandle(std::unique_ptr<ModelT> Model) {
+    EmitCModelRegistry<ModelT>::get().addModel(std::move(Model));
+  }
+};
+} // namespace llvm
+
+// Macro which simplifies registering models with the registry.
+#define REGISTER_EMITC_MODEL(BaseModelType, LocalModelType)                    \
+  namespace {                                                                  \
+  llvm::EmitCModelRegistrationHandle<BaseModelType> _handle_##LocalModelType(  \
+      std::make_unique<llvm::emitc::generated::LocalModelType>());             \
+  }
+
+#endif
diff --git a/llvm/include/llvm/Analysis/EmitCTensor.h b/llvm/include/llvm/Analysis/EmitCTensor.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Analysis/EmitCTensor.h
@@ -0,0 +1,217 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+// This code was originally sourced from github.com/iml130/mlir-emitc and has
+// been modified to fit the needs of generated C++ models in LLVM.
+
+#ifndef LLVM_ANALYSIS_EMITCTENSOR_H
+#define LLVM_ANALYSIS_EMITCTENSOR_H
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstddef>
+#include <functional>
+#include <numeric>
+#include <vector>
+
+// Note: it is critical to keep the llvm prefix of this namespace as the
+// autogenerated emitc  files include an embedded runtime which duplicates
+// _some_ of this code, and this namespace prefix prevents any ODR violations
+namespace llvm::emitc {
+
+namespace utility {
+template <size_t... Shape> static constexpr size_t size() {
+  constexpr std::array<size_t, sizeof...(Shape)> s = {Shape...};
+
+  size_t result = 1;
+  for (size_t i = 0; i < sizeof...(Shape); ++i) {
+    result *= s[i];
+  }
+  return result;
+}
+
+template <size_t... Shape>
+static constexpr std::array<size_t, sizeof...(Shape)> strides() {
+  std::array<size_t, sizeof...(Shape)> result = {};
+  constexpr std::array<size_t, sizeof...(Shape)> s = {Shape...};
+
+  if (sizeof...(Shape) == 0) {
+    return result;
+  }
+
+  result[sizeof...(Shape) - 1] = 1;
+
+  for (size_t i = sizeof...(Shape) - 1; i > 0; i--) {
+    result[i - 1] = result[i] * s[i];
+  }
+
+  return result;
+}
+
+template <size_t... Shape>
+constexpr size_t ravel_index(std::array<size_t, sizeof...(Shape)> indices) {
+  std::array<size_t, sizeof...(Shape)> shape = {Shape...};
+
+  for (size_t i = 0; i < sizeof...(Shape); ++i) {
+    assert(indices[i] < shape[i]);
+  }
+
+  std::array<size_t, sizeof...(Shape)> s = strides<Shape...>();
+
+  size_t result = 0;
+  for (size_t i = 0; i < indices.size(); ++i) {
+    result += indices[i] * s[i];
+  }
+
+  return result;
+}
+
+template <size_t... Shape, typename... Indices>
+constexpr size_t ravel_index(Indices... indices) {
+  static_assert(sizeof...(Indices) == sizeof...(Shape),
+                "Incorrect number of arguments");
+  return ravel_index<Shape...>({static_cast<size_t>(indices)...});
+}
+
+template <size_t... Shape>
+constexpr std::array<size_t, sizeof...(Shape)> unravel_index(size_t index) {
+  assert(index < size<Shape...>());
+
+  std::array<size_t, sizeof...(Shape)> s = strides<Shape...>();
+
+  std::array<size_t, sizeof...(Shape)> result = {};
+  for (size_t i = 0; i < sizeof...(Shape); ++i) {
+    result[i] = index / s[i];
+    index = index % s[i];
+  }
+
+  return result;
+}
+
+template <class...> struct conjunction : std::true_type {};
+template <class B1> struct conjunction<B1> : B1 {};
+template <class B1, class... Bn>
+struct conjunction<B1, Bn...>
+    : std::conditional_t<bool(B1::value), conjunction<Bn...>, B1> {};
+
+template <class... B> constexpr bool conjunction_v = conjunction<B...>::value;
+} // namespace utility
+
+template <typename T, size_t... Shape> class Tensor {
+public:
+  using value_type = T;
+  using reference = typename std::vector<T>::reference;
+  using iterator = typename std::vector<T>::iterator;
+  using const_iterator = typename std::vector<T>::const_iterator;
+
+  Tensor() : data(size()) {}
+
+  Tensor(std::initializer_list<T> data) : data(data) {
+    assert(data.size() == size());
+  }
+
+  Tensor(std::vector<T> data) : data(std::move(data)) {
+    assert(data.size() == size());
+  }
+
+  T *get() { return data.data(); }
+
+  static constexpr size_t dim(size_t index) {
+    assert(0 <= index && index < rank());
+    constexpr std::array<size_t, rank()> s = {Shape...};
+    return s[index];
+  }
+
+  static constexpr size_t rank() { return sizeof...(Shape); }
+
+  static constexpr std::array<size_t, rank()> shape() { return {Shape...}; }
+
+  static constexpr size_t size() { return utility::size<Shape...>(); }
+
+  static constexpr std::array<size_t, rank()> strides() {
+    return utility::strides<Shape...>();
+  }
+
+  std::vector<std::array<size_t, rank()>>
+  window(std::array<size_t, rank()> index, std::array<size_t, rank()> sizes) {
+    std::vector<std::vector<size_t>> iotas;
+    for (auto &size : sizes) {
+      std::vector<size_t> range(size);
+      std::iota(range.begin(), range.end(), 0);
+      iotas.push_back(range);
+    }
+
+    std::vector<std::array<size_t, rank()>> result;
+
+    int resultSize =
+        std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<int>{});
+    for (int n = 0; n < resultSize; ++n) {
+      std::array<size_t, rank()> u = {};
+      div_t q{n, 0};
+      for (int i = iotas.size() - 1; 0 <= i; --i) {
+        q = div(q.quot, iotas[i].size());
+        u[i] = iotas[i][q.rem];
+      }
+
+      for (size_t i = 0; i < index.size(); ++i) {
+        u[i] += index[i];
+      }
+      result.push_back(u);
+    }
+
+    return result;
+  }
+
+  iterator begin() { return data.begin(); }
+
+  const_iterator begin() const { return data.begin(); }
+
+  iterator end() { return data.end(); }
+
+  const_iterator end() const { return data.end(); }
+
+  // Index into the flat data buffer.
+  reference operator[](size_t index) {
+    assert(0 <= index && index < size());
+    return data[index];
+  }
+
+  template <typename... Indices,
+            typename = std::enable_if<
+                utility::conjunction_v<std::is_same<size_t, Indices>...>>>
+  reference operator()(Indices... indices) {
+    static_assert(sizeof...(Indices) == rank(),
+                  "Incorrect number of arguments");
+    size_t index = ravel_index({static_cast<size_t>(indices)...});
+
+    assert(index < size());
+    return data[index];
+  }
+
+  constexpr size_t ravel_index(std::array<size_t, rank()> indices) {
+    return utility::ravel_index<Shape...>(indices);
+  }
+
+  constexpr std::array<size_t, rank()> unravel_index(size_t index) {
+    return utility::unravel_index<Shape...>(index);
+  }
+
+private:
+  std::vector<T> data;
+};
+
+} // namespace llvm::emitc
+
+#endif
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -41,7 +41,7 @@
 /// training.
 ///
 /// - Dynamically load an advisor via a plugin (PluginInlineAdvisorAnalysis)
-enum class InliningAdvisorMode : int { Default, Release, Development };
+enum class InliningAdvisorMode : int { Default, Release, Development, EmitC };
 
 // Each entry represents an inline driver.
 enum class InlinePass : int {
@@ -356,6 +356,9 @@
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
 };
 
+std::unique_ptr<InlineAdvisor>
+getEmitCModeAdvisor(Module &M, ModuleAnalysisManager &MAM);
+
 std::unique_ptr<InlineAdvisor>
 getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM);
 
diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
--- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
+++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
@@ -22,34 +22,34 @@
 // inline cost, and we define them separately to preserve the original heuristic
 // behavior.
 #define INLINE_COST_FEATURE_ITERATOR(M)                                        \
-  M(SROASavings, "sroa_savings")                                               \
-  M(SROALosses, "sroa_losses")                                                 \
-  M(LoadElimination, "load_elimination")                                       \
-  M(CallPenalty, "call_penalty")                                               \
-  M(CallArgumentSetup, "call_argument_setup")                                  \
-  M(LoadRelativeIntrinsic, "load_relative_intrinsic")                          \
-  M(LoweredCallArgSetup, "lowered_call_arg_setup")                             \
-  M(IndirectCallPenalty, "indirect_call_penalty")                              \
-  M(JumpTablePenalty, "jump_table_penalty")                                    \
-  M(CaseClusterPenalty, "case_cluster_penalty")                                \
-  M(SwitchPenalty, "switch_penalty")                                           \
-  M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions")        \
-  M(NumLoops, "num_loops")                                                     \
-  M(DeadBlocks, "dead_blocks")                                                 \
-  M(SimplifiedInstructions, "simplified_instructions")                         \
-  M(ConstantArgs, "constant_args")                                             \
-  M(ConstantOffsetPtrArgs, "constant_offset_ptr_args")                         \
-  M(CallSiteCost, "callsite_cost")                                             \
-  M(ColdCcPenalty, "cold_cc_penalty")                                          \
-  M(LastCallToStaticBonus, "last_call_to_static_bonus")                        \
-  M(IsMultipleBlocks, "is_multiple_blocks")                                    \
-  M(NestedInlines, "nested_inlines")                                           \
-  M(NestedInlineCostEstimate, "nested_inline_cost_estimate")                   \
-  M(Threshold, "threshold")
+  M(SROASavings, sroa_savings, "")                                             \
+  M(SROALosses, sroa_losses, "")                                               \
+  M(LoadElimination, load_elimination, "")                                     \
+  M(CallPenalty, call_penalty, "")                                             \
+  M(CallArgumentSetup, call_argument_setup, "")                                \
+  M(LoadRelativeIntrinsic, load_relative_intrinsic, "")                        \
+  M(LoweredCallArgSetup, lowered_call_arg_setup, "")                           \
+  M(IndirectCallPenalty, indirect_call_penalty, "")                            \
+  M(JumpTablePenalty, jump_table_penalty, "")                                  \
+  M(CaseClusterPenalty, case_cluster_penalty, "")                              \
+  M(SwitchPenalty, switch_penalty, "")                                         \
+  M(UnsimplifiedCommonInstructions, unsimplified_common_instructions, "")      \
+  M(NumLoops, num_loops, "")                                                   \
+  M(DeadBlocks, dead_blocks, "")                                               \
+  M(SimplifiedInstructions, simplified_instructions, "")                       \
+  M(ConstantArgs, constant_args, "")                                           \
+  M(ConstantOffsetPtrArgs, constant_offset_ptr_args, "")                       \
+  M(CallSiteCost, callsite_cost, "")                                           \
+  M(ColdCcPenalty, cold_cc_penalty, "")                                        \
+  M(LastCallToStaticBonus, last_call_to_static_bonus, "")                      \
+  M(IsMultipleBlocks, is_multiple_blocks, "")                                  \
+  M(NestedInlines, nested_inlines, "")                                         \
+  M(NestedInlineCostEstimate, nested_inline_cost_estimate, "")                 \
+  M(Threshold, threshold, "")
 
 // clang-format off
 enum class InlineCostFeatureIndex : size_t {
-#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
+#define POPULATE_INDICES(INDEX_NAME, NAME, DOC) INDEX_NAME,
   INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
 #undef POPULATE_INDICES
 
@@ -81,39 +81,37 @@
 // programmatically, and serves as workaround to inability of inserting comments
 // in macros.
 #define INLINE_FEATURE_ITERATOR(M)                                             \
-  M(CalleeBasicBlockCount, "callee_basic_block_count",                         \
+  M(CalleeBasicBlockCount, callee_basic_block_count,                           \
     "number of basic blocks of the callee")                                    \
-  M(CallSiteHeight, "callsite_height",                                         \
+  M(CallSiteHeight, callsite_height,                                           \
     "position of the call site in the original call graph - measured from "    \
     "the farthest SCC")                                                        \
-  M(NodeCount, "node_count",                                                   \
+  M(NodeCount, node_count,                                                     \
     "total current number of defined functions in the module")                 \
-  M(NrCtantParams, "nr_ctant_params",                                          \
+  M(NrCtantParams, nr_ctant_params,                                            \
     "number of parameters in the call site that are constants")                \
-  M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)")   \
-  M(EdgeCount, "edge_count", "total number of calls in the module")            \
-  M(CallerUsers, "caller_users",                                               \
+  M(CostEstimate, cost_estimate, "total cost estimate (threshold - free)")     \
+  M(EdgeCount, edge_count, "total number of calls in the module")              \
+  M(CallerUsers, caller_users,                                                 \
     "number of module-internal users of the caller, +1 if the caller is "      \
     "exposed externally")                                                      \
-  M(CallerConditionallyExecutedBlocks, "caller_conditionally_executed_blocks", \
+  M(CallerConditionallyExecutedBlocks, caller_conditionally_executed_blocks,   \
     "number of blocks reached from a conditional instruction, in the caller")  \
-  M(CallerBasicBlockCount, "caller_basic_block_count",                         \
+  M(CallerBasicBlockCount, caller_basic_block_count,                           \
     "number of basic blocks in the caller")                                    \
-  M(CalleeConditionallyExecutedBlocks, "callee_conditionally_executed_blocks", \
+  M(CalleeConditionallyExecutedBlocks, callee_conditionally_executed_blocks,   \
     "number of blocks reached from a conditional instruction, in the callee")  \
-  M(CalleeUsers, "callee_users",                                               \
+  M(CalleeUsers, callee_users,                                                 \
     "number of module-internal users of the callee, +1 if the callee is "      \
     "exposed externally")
 
 // clang-format off
 enum class FeatureIndex : size_t {
+#define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME,
 // InlineCost features - these must come first
-#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
   INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
-#undef POPULATE_INDICES
 
 // Non-cost features
-#define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME,
   INLINE_FEATURE_ITERATOR(POPULATE_INDICES)
 #undef POPULATE_INDICES
 
diff --git a/llvm/include/llvm/Analysis/MLInlineEmitCModel.h b/llvm/include/llvm/Analysis/MLInlineEmitCModel.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Analysis/MLInlineEmitCModel.h
@@ -0,0 +1,49 @@
+//===- MLInlineEmitCModel.h -- Model for inlining EmitC Models --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_ANALYSIS_MLINLINEEMITCMODEL_H
+#define LLVM_ANALYSIS_MLINLINEEMITCMODEL_H
+
+#include "llvm/Analysis/EmitCTensor.h"
+#include "llvm/Analysis/InlineModelFeatureMaps.h"
+
+#include <string>
+
+namespace llvm {
+
+// This is the base class for all EmitC-generated models for the inlining -Oz
+// problem.
+class MLInlineOzEmitCModel {
+public:
+  // Define a setter method for each input field
+#define DEFINE_SETTER(cpp_name, py_name, _)                                    \
+  virtual void set_##py_name(emitc::Tensor<int64_t, 1>) = 0;
+
+  INLINE_FEATURE_ITERATOR(DEFINE_SETTER);
+  INLINE_COST_FEATURE_ITERATOR(DEFINE_SETTER);
+#undef FEATURE_SETTER
+
+  // These setters represent fields in every EmitC-generated model. We include
+  // them here for completeness, but they are not pure-virtual because they are
+  // not strictly necessary.
+  virtual void set_inlining_default(emitc::Tensor<int64_t, 1> x) {}
+  virtual void set_step_type(emitc::Tensor<int32_t, 1> x) {}
+  virtual void set_discount(emitc::Tensor<float, 1> x) {}
+  virtual void set_reward(emitc::Tensor<float, 1> x) {}
+
+  // Name of the model: this is used when inserting models into the registry
+  virtual std::string name() = 0;
+
+  // Run the model
+  virtual emitc::Tensor<int64_t, 1> run() = 0;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Analysis/MLModelRunner.h b/llvm/include/llvm/Analysis/MLModelRunner.h
--- a/llvm/include/llvm/Analysis/MLModelRunner.h
+++ b/llvm/include/llvm/Analysis/MLModelRunner.h
@@ -47,7 +47,14 @@
     return (const_cast<MLModelRunner *>(this))->getTensorUntyped(Index);
   }
 
-  enum class Kind : int { Unknown, Release, Development, NoOp, Interactive };
+  enum class Kind : int {
+    Unknown,
+    Release,
+    Development,
+    NoOp,
+    Interactive,
+    EmitC
+  };
   Kind getKind() const { return Type; }
   virtual void switchContext(StringRef Name) {}
 
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -147,6 +147,10 @@
   VFABIDemangling.cpp
   ${GeneratedMLSources}
 
+  # Start EmitC-generated files
+  models/emitc/InlineOzTestModel.emitc.cpp
+  # End EmitC-generated files
+
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Analysis
 
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -234,6 +234,9 @@
     LLVM_DEBUG(dbgs() << "Using release-mode inliner policy.\n");
     Advisor = llvm::getReleaseModeAdvisor(M, MAM);
     break;
+  case InliningAdvisorMode::EmitC:
+    LLVM_DEBUG(dbgs() << "Using EmitC-compiled policy.\n");
+    Advisor = llvm::getEmitCModeAdvisor(M, MAM);
   }
 
   return !!Advisor;
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/EmitCModelRegistry.h"
 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InlineModelFeatureMaps.h"
@@ -29,9 +30,19 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/CommandLine.h"
+#include "MLInlinerEmitCRunner.h"
+
+
+// Start EmitC model registration
+#include "models/emitc/InlineOzTestModel.emitc.h"
+REGISTER_EMITC_MODEL(::llvm::MLInlineOzEmitCModel, InlineOzTestModel);
+// End EmitC model registration
 
 using namespace llvm;
 
+static cl::opt<std::string> MLInlineEmitCModelName(
+    "inliner-emitc-model-name", cl::Hidden, cl::desc("Name of the model to use for the ml inlining advisor."));
+
 static cl::opt<std::string> InteractiveChannelBaseName(
     "inliner-interactive-channel-base", cl::Hidden,
     cl::desc(
@@ -47,6 +58,12 @@
 using CompiledModelType = NoopSavedModelImpl;
 #endif
 
+std::unique_ptr<InlineAdvisor>
+llvm::getEmitCModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
+  auto Runner = std::make_unique<EmitCModelRunner>(M.getContext(), FeatureMap, InlineDecisionSpec, MLInlineEmitCModelName);
+  return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(Runner));
+}
+
 std::unique_ptr<InlineAdvisor>
 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
   if (!llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() &&
@@ -80,13 +97,11 @@
 
 // clang-format off
 const std::vector<TensorSpec> llvm::FeatureMap{
-#define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
+#define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(#NAME, {1} ),
 // InlineCost features - these must come first
   INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
-#undef POPULATE_NAMES
 
 // Non-cost features
-#define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
   INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
 #undef POPULATE_NAMES
 };
diff --git a/llvm/lib/Analysis/MLInlinerEmitCRunner.h b/llvm/lib/Analysis/MLInlinerEmitCRunner.h
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Analysis/MLInlinerEmitCRunner.h
@@ -0,0 +1,86 @@
+//===- MLInlinerEmitCRunner.h ---- EmitC ML model runner  -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_ANALYSIS_EMITCMODELRUNNER_H
+#define LLVM_ANALYSIS_EMITCMODELRUNNER_H
+
+#include "llvm/Analysis/EmitCModelRegistry.h"
+#include "llvm/Analysis/EmitCTensor.h"
+#include "llvm/Analysis/InlineModelFeatureMaps.h"
+#include "llvm/Analysis/MLInlineEmitCModel.h"
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+
+namespace llvm {
+
+// Temporary convenience method to convert between raw memory buffers and
+// emitc::Tensor types. This process can be optimized, but for now it is an
+// easy solution.
+template <class T, int64_t... Dims>
+emitc::Tensor<T, Dims...> convertBufferToEmitCTensor(void *Buffer,
+                                                     TensorSpec Spec) {
+  std::vector<T> Data;
+  T *TypedBuffer = static_cast<T *>(Buffer);
+  std::copy(TypedBuffer, TypedBuffer + Spec.getElementCount(),
+            std::back_inserter(Data));
+  return emitc::Tensor<T, Dims...>(std::move(Data));
+}
+
+class EmitCModelRunner : public MLModelRunner {
+public:
+  EmitCModelRunner(LLVMContext &Ctx, const std::vector<TensorSpec> &Inputs,
+                   const TensorSpec &Advice, std::string ModelName)
+      : MLModelRunner(Ctx, MLModelRunner::Kind::EmitC, Inputs.size()),
+        InputSpecs(Inputs), OutputSpec(Advice) {
+    for (size_t I = 0; I < Inputs.size(); ++I) {
+      setUpBufferForTensor(I, Inputs[I], nullptr);
+    }
+
+    // Look up required model from the registry
+    Model = EmitCModelRegistry<MLInlineOzEmitCModel>::get().getModel(ModelName);
+    if (!Model) {
+      Ctx.emitError("The requested model [" + ModelName +
+                    "] was not registered!");
+    }
+  }
+
+  static bool classof(const MLModelRunner *R) {
+    return R->getKind() == MLModelRunner::Kind::EmitC;
+  }
+
+  virtual ~EmitCModelRunner() {}
+
+  void *evaluateUntyped() override {
+#define FEATURE_SETTER(cpp_name, py_name, _)                                   \
+  do {                                                                         \
+    size_t idx = static_cast<size_t>(FeatureIndex::cpp_name);                  \
+    Model->set_##py_name(convertBufferToEmitCTensor<int64_t, 1>(               \
+        getTensorUntyped(idx), InputSpecs[idx]));                              \
+  } while (false);
+
+    INLINE_FEATURE_ITERATOR(FEATURE_SETTER);
+    INLINE_COST_FEATURE_ITERATOR(FEATURE_SETTER);
+
+#undef FEATURE_SETTER
+    Result = Model->run();
+    return static_cast<void *>(Result.get());
+  }
+
+private:
+  const std::vector<TensorSpec> InputSpecs;
+  const TensorSpec OutputSpec;
+
+  emitc::Tensor<int64_t, 1> Result;
+
+  MLInlineOzEmitCModel *Model;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.h b/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.h
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.h
@@ -0,0 +1,100 @@
+#include "llvm/Analysis/EmitCTensor.h"
+#include "llvm/Analysis/MLInlineEmitCModel.h"
+namespace llvm::emitc::generated {
+class InlineOzTestModel : public ::llvm::MLInlineOzEmitCModel {
+private:
+  ::llvm::emitc::Tensor<int64_t, 1> v1;
+  ::llvm::emitc::Tensor<int64_t, 1> v2;
+  ::llvm::emitc::Tensor<int64_t, 1> v3;
+  ::llvm::emitc::Tensor<int64_t, 1> v4;
+  ::llvm::emitc::Tensor<int64_t, 1> v5;
+  ::llvm::emitc::Tensor<int64_t, 1> v6;
+  ::llvm::emitc::Tensor<int64_t, 1> v7;
+  ::llvm::emitc::Tensor<int64_t, 1> v8;
+  ::llvm::emitc::Tensor<int64_t, 1> v9;
+  ::llvm::emitc::Tensor<int64_t, 1> v10;
+  ::llvm::emitc::Tensor<int64_t, 1> v11;
+  ::llvm::emitc::Tensor<int64_t, 1> v12;
+  ::llvm::emitc::Tensor<int64_t, 1> v13;
+  ::llvm::emitc::Tensor<int64_t, 1> v14;
+  ::llvm::emitc::Tensor<int64_t, 1> v15;
+  ::llvm::emitc::Tensor<int64_t, 1> v16;
+  ::llvm::emitc::Tensor<int64_t, 1> v17;
+  ::llvm::emitc::Tensor<int64_t, 1> v18;
+  ::llvm::emitc::Tensor<int64_t, 1> v19;
+  ::llvm::emitc::Tensor<int64_t, 1> v20;
+  ::llvm::emitc::Tensor<int64_t, 1> v21;
+  ::llvm::emitc::Tensor<int64_t, 1> v22;
+  ::llvm::emitc::Tensor<int64_t, 1> v23;
+  ::llvm::emitc::Tensor<int64_t, 1> v24;
+  ::llvm::emitc::Tensor<int64_t, 1> v25;
+  ::llvm::emitc::Tensor<int64_t, 1> v26;
+  ::llvm::emitc::Tensor<int64_t, 1> v27;
+  ::llvm::emitc::Tensor<int32_t, 1> v28;
+  ::llvm::emitc::Tensor<int64_t, 1> v29;
+  ::llvm::emitc::Tensor<int64_t, 1> v30;
+  ::llvm::emitc::Tensor<int64_t, 1> v31;
+  ::llvm::emitc::Tensor<int64_t, 1> v32;
+  ::llvm::emitc::Tensor<int64_t, 1> v33;
+  ::llvm::emitc::Tensor<int64_t, 1> v34;
+  ::llvm::emitc::Tensor<int64_t, 1> v35;
+  ::llvm::emitc::Tensor<int64_t, 1> v36;
+  ::llvm::emitc::Tensor<float, 1> v37;
+  ::llvm::emitc::Tensor<int64_t, 1> v38;
+  ::llvm::emitc::Tensor<float, 1> v39;
+
+public:
+  void set_callsite_cost(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_is_multiple_blocks(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_caller_conditionally_executed_blocks(
+      ::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_inlining_default(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_cold_cc_penalty(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_callee_conditionally_executed_blocks(
+      ::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_callee_users(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void
+  set_callee_basic_block_count(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_nr_ctant_params(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void
+  set_load_relative_intrinsic(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_jump_table_penalty(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_unsimplified_common_instructions(
+      ::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_indirect_call_penalty(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_load_elimination(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_call_penalty(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_cost_estimate(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_case_cluster_penalty(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_node_count(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_call_argument_setup(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_sroa_savings(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_lowered_call_arg_setup(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_threshold(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_dead_blocks(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_constant_args(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_sroa_losses(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void
+  set_simplified_instructions(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_num_loops(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_step_type(::llvm::emitc::Tensor<int32_t, 1> x) override;
+  void set_edge_count(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_nested_inlines(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void
+  set_caller_basic_block_count(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void
+  set_last_call_to_static_bonus(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void
+  set_nested_inline_cost_estimate(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_callsite_height(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void
+  set_constant_offset_ptr_args(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_switch_penalty(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_discount(::llvm::emitc::Tensor<float, 1> x) override;
+  void set_caller_users(::llvm::emitc::Tensor<int64_t, 1> x) override;
+  void set_reward(::llvm::emitc::Tensor<float, 1> x) override;
+  std::string name() override { return "InlineOzTestModel"; }
+  ::llvm::emitc::Tensor<int64_t, 1> run() override;
+};
+
+} // namespace llvm::emitc::generated
diff --git a/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.cpp b/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Analysis/models/emitc/InlineOzTestModel.emitc.cpp
@@ -0,0 +1,1454 @@
+#include "InlineOzTestModel.emitc.h"
+#include "llvm/Analysis/EmitCTensor.h"
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <numeric>
+#include <type_traits>
+#include <vector>
+namespace {
+namespace emitc {
+namespace utility {
+template <size_t... Shape> static constexpr size_t size() {
+  constexpr std::array<size_t, sizeof...(Shape)> s = {Shape...};
+  size_t result = 1;
+  for (size_t i = 0; i < sizeof...(Shape); ++i) {
+    result *= s[i];
+  }
+  return result;
+}
+template <size_t... Shape>
+static constexpr std::array<size_t, sizeof...(Shape)> strides() {
+  std::array<size_t, sizeof...(Shape)> result = {};
+  constexpr std::array<size_t, sizeof...(Shape)> s = {Shape...};
+  if (sizeof...(Shape) == 0) {
+    return result;
+  }
+  result[sizeof...(Shape) - 1] = 1;
+  for (size_t i = sizeof...(Shape) - 1; i > 0; i--) {
+    result[i - 1] = result[i] * s[i];
+  }
+  return result;
+}
+template <size_t... Shape>
+constexpr size_t ravel_index(std::array<size_t, sizeof...(Shape)> indices) {
+  std::array<size_t, sizeof...(Shape)> shape = {Shape...};
+  for (size_t i = 0; i < sizeof...(Shape); ++i) {
+    assert(indices[i] < shape[i]);
+  }
+  std::array<size_t, sizeof...(Shape)> s = strides<Shape...>();
+  size_t result = 0;
+  for (size_t i = 0; i < indices.size(); ++i) {
+    result += indices[i] * s[i];
+  }
+  return result;
+}
+template <size_t... Shape, typename... Indices>
+constexpr size_t ravel_index(Indices... indices) {
+  static_assert(sizeof...(Indices) == sizeof...(Shape),
+                "Incorrect number of arguments");
+  return ravel_index<Shape...>({static_cast<size_t>(indices)...});
+}
+template <size_t... Shape>
+constexpr std::array<size_t, sizeof...(Shape)> unravel_index(size_t index) {
+  assert(index < size<Shape...>());
+  std::array<size_t, sizeof...(Shape)> s = strides<Shape...>();
+  std::array<size_t, sizeof...(Shape)> result = {};
+  for (size_t i = 0; i < sizeof...(Shape); ++i) {
+    result[i] = index / s[i];
+    index = index % s[i];
+  }
+  return result;
+}
+} // namespace utility
+} // namespace emitc
+namespace detail {
+template <size_t N> constexpr size_t sum(const std::array<size_t, N> arr) {
+  size_t result = 0;
+  for (size_t i = 0; i < arr.size(); ++i) {
+    result += arr[i];
+  }
+  return result;
+}
+template <size_t N> constexpr size_t first(const std::array<size_t, N> arr) {
+  static_assert(N > 0, "Cannot get the first element of an empty array");
+  return arr[0];
+}
+template <size_t N> constexpr bool all_same(const std::array<size_t, N> arr) {
+  if (arr.size() == 0) {
+    return true;
+  }
+  size_t first = arr[0];
+  for (size_t i = 1; i < arr.size(); ++i) {
+    if (arr[i] != first) {
+      return false;
+    }
+  }
+  return true;
+}
+template <class...> struct conjunction : std::true_type {};
+template <class B1> struct conjunction<B1> : B1 {};
+template <class B1, class... Bn>
+struct conjunction<B1, Bn...>
+    : std::conditional_t<bool(B1::value), conjunction<Bn...>, B1> {};
+template <class... B> constexpr bool conjunction_v = conjunction<B...>::value;
+template <bool B, typename T> struct case_t {
+  static constexpr bool value = B;
+  using type = T;
+};
+template <typename First, typename... Rest>
+struct switch_t : std::conditional_t<First::value, First, switch_t<Rest...>> {};
+template <typename T> struct switch_t<T> {
+  using type = T;
+};
+template <bool B, typename T> struct switch_t<case_t<B, T>> {
+  static_assert(B, "None of the supplied conditions evaluate to true.");
+  using type = T;
+};
+} // namespace detail
+template <typename T, size_t... Shape> class _UnusedTensorType {
+public:
+  using value_type = T;
+  using reference = typename std::vector<T>::reference;
+  using iterator = typename std::vector<T>::iterator;
+  using const_iterator = typename std::vector<T>::const_iterator;
+  _UnusedTensorType() : data(size()) {}
+  _UnusedTensorType(std::initializer_list<T> data) : data(data) {
+    assert(data.size() == size());
+  }
+  _UnusedTensorType(std::vector<T> data) : data(std::move(data)) {}
+  static constexpr size_t dim(size_t index) {
+    assert(0 <= index && index < rank());
+    constexpr std::array<size_t, rank()> s = {Shape...};
+    return s[index];
+  }
+  static constexpr size_t rank() { return sizeof...(Shape); }
+  static constexpr std::array<size_t, rank()> shape() { return {Shape...}; }
+  static constexpr size_t size() { return ::emitc::utility::size<Shape...>(); }
+  static constexpr std::array<size_t, rank()> strides() {
+    return ::emitc::utility::strides<Shape...>();
+  }
+  T *get() { return data.data(); }
+  std::vector<std::array<size_t, rank()>>
+  window(std::array<size_t, rank()> index, std::array<size_t, rank()> sizes) {
+    std::vector<std::vector<size_t>> iotas;
+    for (auto &size : sizes) {
+      std::vector<size_t> range(size);
+      std::iota(range.begin(), range.end(), 0);
+      iotas.push_back(range);
+    }
+    std::vector<std::array<size_t, rank()>> result;
+    int resultSize =
+        std::accumulate(sizes.begin(), sizes.end(), 1, std::multiplies<int>{});
+    for (int n = 0; n < resultSize; ++n) {
+      std::array<size_t, rank()> u = {};
+      div_t q{n, 0};
+      for (int i = iotas.size() - 1; 0 <= i; --i) {
+        q = div(q.quot, iotas[i].size());
+        u[i] = iotas[i][q.rem];
+      }
+      for (size_t i = 0; i < index.size(); ++i) {
+        u[i] += index[i];
+      }
+      result.push_back(u);
+    }
+    return result;
+  }
+  iterator begin() { return data.begin(); }
+  const_iterator begin() const { return data.begin(); }
+  iterator end() { return data.end(); }
+  const_iterator end() const { return data.end(); }
+  reference operator[](size_t index) {
+    assert(0 <= index && index < size());
+    return data[index];
+  }
+  template <typename... Indices,
+            typename = std::enable_if<
+                detail::conjunction_v<std::is_same<size_t, Indices>...>>>
+  reference operator()(Indices... indices) {
+    static_assert(sizeof...(Indices) == rank(),
+                  "Incorrect number of arguments");
+    size_t index = ravel_index({static_cast<size_t>(indices)...});
+    assert(index < size());
+    return data[index];
+  }
+  constexpr size_t ravel_index(std::array<size_t, rank()> indices) {
+    return ::emitc::utility::ravel_index<Shape...>(indices);
+  }
+  constexpr std::array<size_t, rank()> unravel_index(size_t index) {
+    return ::emitc::utility::unravel_index<Shape...>(index);
+  }
+
+private:
+  std::vector<T> data;
+};
+template <typename T> using Tensor0D = ::llvm::emitc::Tensor<T>;
+template <typename T, size_t Dim0>
+using Tensor1D = ::llvm::emitc::Tensor<T, Dim0>;
+template <typename T, size_t Dim0, size_t Dim1>
+using Tensor2D = ::llvm::emitc::Tensor<T, Dim0, Dim1>;
+template <typename T, size_t Dim0, size_t Dim1, size_t Dim2>
+using Tensor3D = ::llvm::emitc::Tensor<T, Dim0, Dim1, Dim2>;
+template <typename T, size_t Dim0, size_t Dim1, size_t Dim2, size_t Dim3>
+using Tensor4D = ::llvm::emitc::Tensor<T, Dim0, Dim1, Dim2, Dim3>;
+template <typename T> using is_scalar = std::is_arithmetic<T>;
+template <typename T, typename Unused = void>
+struct is_tensor : std::false_type {};
+template <typename T, size_t... Shape>
+struct is_tensor<::llvm::emitc::Tensor<T, Shape...>> : std::true_type {};
+template <size_t Dim, typename T, typename Unused = void>
+struct is_tensor_of_dim : std::false_type {};
+template <size_t Dim, typename T, size_t... Shape>
+struct is_tensor_of_dim<Dim, ::llvm::emitc::Tensor<T, Shape...>> {
+  static constexpr bool value =
+      ::llvm::emitc::Tensor<T, Shape...>::rank() == Dim;
+};
+template <typename T>
+using IsScalar = typename std::enable_if_t<std::is_scalar<T>::value, bool>;
+template <typename T>
+using IsTensor = typename std::enable_if_t<is_tensor<T>::value, bool>;
+template <size_t Dim, typename T>
+using IsTensorOfDim =
+    typename std::enable_if_t<is_tensor_of_dim<Dim, T>::value, bool>;
+template <typename T> struct get_element_type {
+  using type = T;
+};
+template <typename T, size_t... Shape>
+struct get_element_type<::llvm::emitc::Tensor<T, Shape...>> {
+  using type = T;
+};
+template <typename T, typename ET>
+using IsTensorOfType = std::enable_if_t<
+    std::is_same<typename get_element_type<T>::type, ET>::value, bool>;
+template <typename Dest, typename Src> struct replace_element_type {
+  using type = Dest;
+};
+template <typename Dest, typename Src, size_t... Shape>
+struct replace_element_type<Dest, ::llvm::emitc::Tensor<Src, Shape...>> {
+  using type = ::llvm::emitc::Tensor<Dest, Shape...>;
+};
+template <typename Dest, typename Src> using UnaryFuncType = Dest (*)(Src);
+template <typename Dest, typename SrcLeft, typename SrcRight>
+using BinaryFuncType = Dest (*)(SrcLeft, SrcRight);
+template <typename Dest, typename Src, typename UnaryOp, IsScalar<Src> = true>
+inline Dest unary(const Src &x, UnaryOp &&op) {
+  return op(x);
+}
+template <typename Dest, typename Src, typename UnaryOp, IsTensor<Src> = true>
+inline Dest unary(const Src &x, UnaryOp &&op) {
+  Dest z;
+  std::transform(x.begin(), x.end(), z.begin(), op);
+  return z;
+}
+template <typename Dest, typename SrcLeft, typename SrcRight, typename BinaryOp,
+          IsScalar<SrcLeft> = true, IsScalar<SrcRight> = true>
+inline Dest binary(const SrcLeft &x, const SrcRight &y, BinaryOp &&op) {
+  return op(x, y);
+}
+template <typename Dest, typename SrcLeft, typename SrcRight, typename BinaryOp,
+          IsTensor<SrcLeft> = true, IsTensor<SrcRight> = true>
+inline Dest binary(const SrcLeft &x, const SrcRight &y, BinaryOp &&op) {
+  Dest z;
+  std::transform(x.begin(), x.end(), y.begin(), z.begin(), op);
+  return z;
+}
+template <typename Dest, typename SrcA, typename SrcB, typename SrcC,
+          typename TernaryOp, IsScalar<SrcA> = true, IsScalar<SrcB> = true,
+          IsScalar<SrcC> = true>
+inline Dest ternary(const SrcA &a, const SrcB &b, const SrcB &c,
+                    TernaryOp &&op) {
+  return op(a, b, c);
+}
+template <typename Dest, typename SrcA, typename SrcB, typename SrcC,
+          typename TernaryOp, IsTensor<SrcA> = true, IsTensor<SrcB> = true,
+          IsTensor<SrcC> = true>
+inline Dest ternary(const SrcA &a, const SrcB &b, const SrcB &c,
+                    TernaryOp &&op) {
+  Dest d;
+  auto first1 = a.begin(), last1 = a.end();
+  auto first2 = b.begin(), first3 = c.begin();
+  auto result = d.begin();
+  while (first1 != last1) {
+    *result = op(*first1, *first2, *first3);
+    ++result;
+    ++first1;
+    ++first2;
+    ++first3;
+  }
+  return d;
+}
+template <size_t Dim, typename T, typename... Ts> struct concat {};
+template <size_t Dim, typename T, size_t... Xs>
+struct concat<Dim, T, Tensor1D<T, Xs>...> {
+  static_assert(0 <= Dim && Dim < 1, "Dimension index out of bounds");
+  using type = Tensor1D<T, detail::sum<sizeof...(Xs)>({Xs...})>;
+};
+template <typename T, size_t Dim, size_t... Xs, size_t... Ys>
+struct concat<Dim, T, Tensor2D<T, Xs, Ys>...> {
+  static_assert(0 <= Dim && Dim < 2, "Dimension index out of bounds");
+  static_assert((Dim == 0 && detail::all_same<sizeof...(Ys)>({Ys...})) ||
+                    (Dim == 1 && detail::all_same<sizeof...(Xs)>({Xs...})),
+                "All dimensions except for the dimension index must match");
+  using type = typename std::conditional_t<
+      Dim == 0,
+      Tensor2D<T, detail::sum<sizeof...(Xs)>({Xs...}),
+               detail::first<sizeof...(Ys)>({Ys...})>,
+      Tensor2D<T, detail::first<sizeof...(Xs)>({Xs...}),
+               detail::sum<sizeof...(Ys)>({Ys...})>>;
+};
+template <typename T, size_t Dim, size_t... Xs, size_t... Ys, size_t... Zs>
+struct concat<Dim, T, Tensor3D<T, Xs, Ys, Zs>...> {
+  static_assert(0 <= Dim && Dim < 3, "Dimension index out of bounds");
+  using type = typename detail::switch_t<
+      detail::case_t<Dim == 0, Tensor3D<T, detail::sum<sizeof...(Xs)>({Xs...}),
+                                        detail::first<sizeof...(Ys)>({Ys...}),
+                                        detail::first<sizeof...(Zs)>({Zs...})>>,
+      detail::case_t<Dim == 1,
+                     Tensor3D<T, detail::first<sizeof...(Xs)>({Xs...}),
+                              detail::sum<sizeof...(Ys)>({Ys...}),
+                              detail::first<sizeof...(Zs)>({Zs...})>>,
+      detail::case_t<Dim == 2,
+                     Tensor3D<T, detail::first<sizeof...(Xs)>({Xs...}),
+                              detail::first<sizeof...(Ys)>({Ys...}),
+                              detail::sum<sizeof...(Zs)>({Zs...})>>>::type;
+};
+template <typename T, size_t Dim, size_t... D0, size_t... D1, size_t... D2,
+          size_t... D3>
+struct concat<Dim, T, Tensor4D<T, D0, D1, D2, D3>...> {
+  static_assert(0 <= Dim && Dim < 4, "Dimension index out of bounds");
+  using type = typename detail::switch_t<
+      detail::case_t<Dim == 0, Tensor4D<T, detail::sum<sizeof...(D0)>({D0...}),
+                                        detail::first<sizeof...(D1)>({D1...}),
+                                        detail::first<sizeof...(D2)>({D2...}),
+                                        detail::first<sizeof...(D3)>({D3...})>>,
+      detail::case_t<Dim == 1,
+                     Tensor4D<T, detail::first<sizeof...(D0)>({D0...}),
+                              detail::sum<sizeof...(D1)>({D1...}),
+                              detail::first<sizeof...(D2)>({D2...}),
+                              detail::first<sizeof...(D3)>({D3...})>>,
+      detail::case_t<Dim == 2,
+                     Tensor4D<T, detail::first<sizeof...(D0)>({D0...}),
+                              detail::first<sizeof...(D1)>({D1...}),
+                              detail::sum<sizeof...(D2)>({D2...}),
+                              detail::first<sizeof...(D3)>({D3...})>>,
+      detail::case_t<Dim == 3,
+                     Tensor4D<T, detail::first<sizeof...(D0)>({D0...}),
+                              detail::first<sizeof...(D1)>({D1...}),
+                              detail::first<sizeof...(D2)>({D2...}),
+                              detail::sum<sizeof...(D3)>({D3...})>>>::type;
+};
+namespace emitc {
+template <typename Src> inline Src abs(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = static_cast<ET_Src (*)(ET_Src)>(std::abs);
+  return unary<Src>(x, f);
+}
+template <typename Src> inline Src ceil(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = static_cast<ET_Src (*)(ET_Src)>(std::ceil);
+  return unary<Src>(x, f);
+}
+template <typename Dest, typename Src> inline Dest convert(Src x) {
+  using ET_Dest = typename get_element_type<Dest>::type;
+  using ET_Src = typename get_element_type<Src>::type;
+  auto cast = [](ET_Src value) { return static_cast<ET_Dest>(value); };
+  return unary<Dest, Src, UnaryFuncType<ET_Dest, ET_Src>>(x, cast);
+}
+template <typename Src> inline Src exp(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = static_cast<ET_Src (*)(ET_Src)>(std::exp);
+  return unary<Src>(x, f);
+}
+template <typename Src> inline Src floor(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = static_cast<ET_Src (*)(ET_Src)>(std::floor);
+  return unary<Src>(x, f);
+}
+template <typename Src> inline Src log(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = static_cast<ET_Src (*)(ET_Src)>(std::log);
+  return unary<Src>(x, f);
+}
+template <typename Src> inline Src negate(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = std::negate<ET_Src>{};
+  return unary<Src>(x, f);
+}
+template <typename Min, typename Src, typename Max>
+inline Src clamp(Min min, Src operand, Max max) {
+  static_assert(
+      std::is_same<Min, Src>::value ||
+          (is_tensor_of_dim<0, Min>::value &&
+           std::is_same<typename get_element_type<Src>::type,
+                        typename get_element_type<Min>::type>::value),
+      "Expected the same type for min and operand or a 0-dim tensor of the "
+      "same element type for min");
+  static_assert(
+      std::is_same<Max, Src>::value ||
+          (is_tensor_of_dim<0, Max>::value &&
+           std::is_same<typename get_element_type<Src>::type,
+                        typename get_element_type<Max>::type>::value),
+      "Expected the same type for min and operand or a 0-dim tensor of the "
+      "same element type for max");
+  const bool broadcast_min = !std::is_same<Min, Src>::value;
+  const bool broadcast_max = !std::is_same<Max, Src>::value;
+  Src result;
+  for (size_t index = 0; index < Src::size(); index++) {
+    const auto value_min = broadcast_min ? min[0] : min[index];
+    const auto value_max = broadcast_max ? max[0] : max[index];
+    auto value = operand[index];
+    value = value < value_min ? value_min : value;
+    value = value > value_max ? value_max : value;
+    result[index] = value;
+  }
+  return result;
+}
+template <typename Src> inline Src sqrt(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = static_cast<ET_Src (*)(ET_Src)>(std::sqrt);
+  return unary<Src>(x, f);
+}
+template <typename Src> inline Src tanh(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = static_cast<ET_Src (*)(ET_Src)>(std::tanh);
+  return unary<Src>(x, f);
+}
+template <typename Src> inline Src add(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = std::plus<ET_Src>{};
+  return binary<Src>(x, y, f);
+}
+template <typename Src> inline Src max(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f =
+      static_cast<const ET_Src &(*)(const ET_Src &, const ET_Src &)>(std::max);
+  return binary<Src>(x, y, f);
+}
+template <typename Src> inline Src min(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f =
+      static_cast<const ET_Src &(*)(const ET_Src &, const ET_Src &)>(std::min);
+  return binary<Src>(x, y, f);
+}
+template <typename Src> inline Src mul(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = std::multiplies<ET_Src>{};
+  return binary<Src>(x, y, f);
+}
+template <typename Src> inline Src pow(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = [](ET_Src a, ET_Src b) -> ET_Src {
+    if (std::is_integral<ET_Src>::value) {
+      const bool negative = b < 0;
+      if (b < 0) {
+        b = -b;
+      }
+      ET_Src result = 1;
+      for (ET_Src i = 0; i < b; i++) {
+        result *= a;
+      }
+      if (negative) {
+        result = 1 / result;
+      }
+      return result;
+    } else {
+      return std::pow(a, b);
+    }
+  };
+  return binary<Src>(x, y, f);
+}
+template <typename Src> inline Src sub(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = std::minus<ET_Src>{};
+  return binary<Src>(x, y, f);
+}
+template <typename Dest, typename Src>
+inline Dest broadcast_in_dim(
+    Src operand,
+    ::llvm::emitc::Tensor<int64_t, Src::rank()> broadcast_dimensions) {
+  static_assert(is_tensor<Src>::value, "Expected tensor argument");
+  static_assert(is_tensor<Dest>::value, "Expected tensor result");
+  std::vector<size_t> retainedDimensions(Dest::rank());
+  std::iota(retainedDimensions.begin(), retainedDimensions.end(), 0);
+  retainedDimensions.erase(
+      std::remove_if(retainedDimensions.begin(), retainedDimensions.end(),
+                     [&broadcast_dimensions](size_t i) {
+                       return std::find(broadcast_dimensions.begin(),
+                                        broadcast_dimensions.end(),
+                                        i) == broadcast_dimensions.end();
+                     }),
+      retainedDimensions.end());
+  assert(retainedDimensions.size() == Src::rank());
+  Dest result;
+  for (size_t i = 0; i < result.size(); i++) {
+    auto dest_index = result.unravel_index(i);
+    std::array<size_t, Src::rank()> src_index;
+    for (size_t j = 0; j < src_index.size(); j++) {
+      src_index[j] = dest_index[broadcast_dimensions(j)];
+    }
+    for (size_t i = 0; i < src_index.size(); ++i) {
+      if (Src::shape()[i] == 1) {
+        src_index[i] = 0;
+      }
+    }
+    result[i] = operand[operand.ravel_index(src_index)];
+  }
+  return result;
+}
+template <typename Dest, typename Lhs, typename Rhs>
+Dest dot(Lhs lhs, Rhs rhs) {
+  static_assert(is_tensor_of_dim<2, Lhs>::value, "Expected 2 dimensional lhs");
+  static_assert(is_tensor_of_dim<2, Rhs>::value, "Expected 2 dimensional rhs");
+  static_assert(Lhs::dim(1) == Rhs::dim(0),
+                "Expected contracting dimension to match");
+  Dest output;
+  for (size_t m = 0; m < lhs.dim(0); m++) {
+    for (size_t n = 0; n < lhs.dim(1); n++) {
+      for (size_t k = 0; k < rhs.dim(1); k++) {
+        output(m, k) += lhs(m, n) * rhs(n, k);
+      }
+    }
+  }
+  return output;
+}
+template <typename Dest, typename Lhs, typename Rhs>
+Dest batch_matmul(Lhs lhs, Rhs rhs) {
+  static_assert(is_tensor_of_dim<3, Lhs>::value, "Expected 3 dimensional lhs");
+  static_assert(is_tensor_of_dim<3, Rhs>::value, "Expected 3 dimensional rhs");
+  static_assert(Lhs::dim(0) == Rhs::dim(0) && Lhs::dim(0) == Dest::dim(0),
+                "Expected batch dimension to match");
+  static_assert(Lhs::dim(2) == Rhs::dim(1),
+                "Expected contracting dimension to match");
+  static_assert(Dest::dim(1) == Lhs::dim(1), "Expected row dimension to match");
+  static_assert(Dest::dim(2) == Rhs::dim(2),
+                "Expected column dimension to match");
+  Dest output;
+  for (size_t b = 0; b < lhs.dim(0); b++) {
+    for (size_t m = 0; m < lhs.dim(1); m++) {
+      for (size_t n = 0; n < lhs.dim(2); n++) {
+        for (size_t k = 0; k < rhs.dim(2); k++) {
+          output(b, m, k) += lhs(b, m, n) * rhs(b, n, k);
+        }
+      }
+    }
+  }
+  return output;
+}
+template <int64_t Dimension, typename Dest, typename Src>
+inline Dest concatenate(Src input) {
+  Dest z = input;
+  return z;
+}
+template <int64_t Dimension, typename Dest, typename Src1, typename... Src>
+inline Dest concatenate(Src1 input1, Src... inputs) {
+  static_assert(sizeof...(inputs) > 0, "Wrong template specialization chosen");
+  using ET_Src = typename get_element_type<Src1>::type;
+  using Rest = typename concat<Dimension, ET_Src, Src...>::type;
+  Rest rest = concatenate<Dimension, Rest, Src...>(inputs...);
+  Dest z;
+  auto calculate_shift = [](const auto &shape) {
+    size_t shift = 1;
+    for (size_t i = Dimension; i < shape.size(); i++) {
+      shift *= shape[i];
+    }
+    return shift;
+  };
+  auto a_shift = calculate_shift(Src1::shape());
+  auto b_shift = calculate_shift(Rest::shape());
+  for (auto a_ptr = input1.begin(), b_ptr = rest.begin(), c_ptr = z.begin();
+       a_ptr != input1.end(); a_ptr += a_shift, b_ptr += b_shift) {
+    std::copy(a_ptr, a_ptr + a_shift, c_ptr);
+    c_ptr += a_shift;
+    std::copy(b_ptr, b_ptr + b_shift, c_ptr);
+    c_ptr += b_shift;
+  }
+  return z;
+}
+template <typename Dest, typename Src> inline Dest reshape(Src x) {
+  static_assert(is_tensor<Src>::value, "Expected tensor argument");
+  static_assert(is_tensor<Dest>::value, "Expected tensor result");
+  using ET_Src = typename get_element_type<Src>::type;
+  using ET_Dest = typename get_element_type<Dest>::type;
+  static_assert(std::is_same<ET_Src, ET_Dest>::value, "Element type mismatch");
+  static_assert(Src::size() == Dest::size(), "Tensor size mismatch");
+  Dest z;
+  std::copy(x.begin(), x.end(), z.begin());
+  return z;
+}
+template <typename Dest, typename Src, IsTensorOfDim<1, Src> = true>
+Dest slice(Src x, ::llvm::emitc::Tensor<int64_t, 1> start_indices,
+           ::llvm::emitc::Tensor<int64_t, 1> limit_indices,
+           ::llvm::emitc::Tensor<int64_t, 1> strides) {
+  Dest z;
+  size_t index = 0;
+  for (int64_t i = start_indices[0]; i < limit_indices[0]; i += strides[0]) {
+    z[index++] = x(i);
+  }
+  return z;
+}
+template <typename Dest, typename Src, IsTensorOfDim<2, Src> = true>
+Dest slice(Src x, ::llvm::emitc::Tensor<int64_t, 2> start_indices,
+           ::llvm::emitc::Tensor<int64_t, 2> limit_indices,
+           ::llvm::emitc::Tensor<int64_t, 2> strides) {
+  Dest z;
+  size_t index = 0;
+  for (int64_t i = start_indices[0]; i < limit_indices[0]; i += strides[0]) {
+    for (int64_t j = start_indices[1]; j < limit_indices[1]; j += strides[1]) {
+      z[index++] = x(i, j);
+    }
+  }
+  return z;
+}
+template <typename Dest, typename Src, IsTensorOfDim<3, Src> = true>
+Dest slice(Src x, ::llvm::emitc::Tensor<int64_t, 3> start_indices,
+           ::llvm::emitc::Tensor<int64_t, 3> limit_indices,
+           ::llvm::emitc::Tensor<int64_t, 3> strides) {
+  Dest z;
+  size_t index = 0;
+  for (int64_t i = start_indices[0]; i < limit_indices[0]; i += strides[0]) {
+    for (int64_t j = start_indices[1]; j < limit_indices[1]; j += strides[1]) {
+      for (int64_t k = start_indices[2]; k < limit_indices[2];
+           k += strides[2]) {
+        z[index++] = x(i, j, k);
+      }
+    }
+  }
+  return z;
+}
+template <typename Dest, typename Src, IsTensorOfDim<4, Src> = true>
+Dest slice(Src x, ::llvm::emitc::Tensor<int64_t, 4> start_indices,
+           ::llvm::emitc::Tensor<int64_t, 4> limit_indices,
+           ::llvm::emitc::Tensor<int64_t, 4> strides) {
+  Dest z;
+  size_t index = 0;
+  for (int64_t i = start_indices[0]; i < limit_indices[0]; i += strides[0]) {
+    for (int64_t j = start_indices[1]; j < limit_indices[1]; j += strides[1]) {
+      for (int64_t k = start_indices[2]; k < limit_indices[2];
+           k += strides[2]) {
+        for (int64_t c = start_indices[3]; c < limit_indices[3];
+             c += strides[3]) {
+          z[index++] = x(i, j, k, c);
+        }
+      }
+    }
+  }
+  return z;
+}
+template <typename Dest, typename Src>
+inline Dest
+pad(Src operand,
+    ::llvm::emitc::Tensor<typename get_element_type<Src>::type> padding_value,
+    ::llvm::emitc::Tensor<int64_t, Src::rank()> edge_padding_low,
+    ::llvm::emitc::Tensor<int64_t, Src::rank()> edge_padding_high,
+    ::llvm::emitc::Tensor<int64_t, Src::rank()> interior_padding) {
+  assert(std::all_of(interior_padding.begin(), interior_padding.end(),
+                     [](int64_t i) { return i >= 0; }));
+  assert(std::all_of(edge_padding_low.begin(), edge_padding_low.end(),
+                     [](int64_t i) { return i >= 0; }));
+  assert(std::all_of(edge_padding_high.begin(), edge_padding_high.end(),
+                     [](int64_t i) { return i >= 0; }));
+  Dest result;
+  auto interior = [&interior_padding](std::array<size_t, Src::rank()> index) {
+    for (size_t i = 0; i < index.size(); i++) {
+      if (index[i] % (interior_padding[i] + 1) != 0) {
+        return true;
+      }
+    }
+    return false;
+  };
+  auto out_of_bounds = [](std::array<size_t, Src::rank()> index) {
+    for (size_t i = 0; i < index.size(); i++) {
+      if (index[i] < 0 || index[i] >= Src::dim(i)) {
+        return true;
+      }
+    }
+    return false;
+  };
+  for (size_t i = 0; i < result.size(); i++) {
+    auto index = result.unravel_index(i);
+    for (size_t j = 0; j < index.size(); j++) {
+      index[j] -= edge_padding_low[j];
+    }
+    if (interior(index)) {
+      result[i] = padding_value();
+    } else {
+      for (size_t j = 0; j < index.size(); j++) {
+        size_t pad = interior_padding[j];
+        assert(index[j] % (pad + 1) == 0);
+        index[j] /= (pad + 1);
+      }
+      if (out_of_bounds(index)) {
+        result[i] = padding_value();
+      } else {
+        result[i] = operand[operand.ravel_index(index)];
+      }
+    }
+  }
+  return result;
+}
+} // namespace emitc
+namespace emitc {
+namespace tensor {
+template <typename T, size_t... Shape, typename... Indices>
+inline T extract(::llvm::emitc::Tensor<T, Shape...> x, Indices... indices) {
+  return x(indices...);
+}
+template <typename Dest, typename Src, IsScalar<Src> = true>
+inline Dest splat(Src x) {
+  Dest z;
+  std::fill(z.begin(), z.end(), x);
+  return z;
+}
+} // namespace tensor
+} // namespace emitc
+namespace emitc {
+namespace tosa {
+template <typename Src> inline Src abs(Src x) { return ::emitc::abs<Src>(x); }
+template <typename Dest, typename Src> inline Dest cast(Src x) {
+  return ::emitc::convert<Dest>(x);
+}
+template <typename Src> inline Src ceil(Src x) { return ::emitc::ceil<Src>(x); }
+template <typename Src>
+inline Src clamp(Src operand, typename Src::value_type min_value,
+                 typename Src::value_type max_value) {
+  ::llvm::emitc::Tensor<typename Src::value_type> min{min_value};
+  ::llvm::emitc::Tensor<typename Src::value_type> max{max_value};
+  return ::emitc::clamp(min, operand, max);
+}
+template <typename Src> inline Src clz(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  static_assert(std::is_same<ET_Src, int32_t>::value,
+                "Expected tensor of type int32_t");
+  auto f = [](ET_Src element) {
+    ET_Src count = 32;
+    while (element != 0 && count > 0) {
+      count--;
+      element >>= 1;
+    }
+    return count;
+  };
+  return unary<Src>(x, f);
+}
+template <typename Src> inline Src exp(Src x) { return ::emitc::exp<Src>(x); }
+template <typename Src> inline Src floor(Src x) {
+  return ::emitc::floor<Src>(x);
+}
+template <typename Src> inline Src log(Src x) { return ::emitc::log<Src>(x); }
+template <typename Src> inline Src negate(Src x) { return ::emitc::negate(x); }
+template <typename Src> inline Src reciprocal(Src x) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = [](ET_Src element) { return (static_cast<ET_Src>(1.0) / element); };
+  return unary<Src>(x, f);
+}
+template <typename Dest, size_t Dim, typename Src>
+inline Dest rescale(Src x, typename get_element_type<Src>::type in_zp,
+                    typename get_element_type<Dest>::type out_zp,
+                    Tensor1D<int32_t, Dim> mult, Tensor1D<int32_t, Dim> shift,
+                    bool scale32, bool double_round, bool per_channel) {
+  using ET_Dest = typename get_element_type<Dest>::type;
+  using Dest_I32 = typename replace_element_type<int32_t, Dest>::type;
+  assert(!(!scale32 && double_round) &&
+         "Invalid combination of `scale32` and `double_round` arguments.");
+  auto apply_scale = [=](int64_t element, int64_t mult, int64_t shift) {
+    int64_t round = 1 << (shift - 1);
+    if (double_round && shift > 31) {
+      if (element >= 0)
+        round += 1 << 30;
+      else
+        round -= 1 << 30;
+    }
+    int64_t result = (element * mult + round) >> shift;
+    return static_cast<int32_t>(result);
+  };
+  Dest_I32 result;
+  for (size_t i = 0; i < x.size(); ++i) {
+    size_t index = per_channel ? x.unravel_index(i)[x.rank() - 1] : 0;
+    int64_t element = x[i] - in_zp;
+    int32_t scaled_element = apply_scale(element, mult[index], shift[index]);
+    result[i] = scaled_element + out_zp;
+  }
+  Tensor0D<int32_t> min{
+      static_cast<int32_t>(std::numeric_limits<ET_Dest>::min())};
+  Tensor0D<int32_t> max{
+      static_cast<int32_t>(std::numeric_limits<ET_Dest>::max())};
+  return cast<Dest>(::emitc::clamp(min, result, max));
+}
+template <typename Src> inline Src tanh(Src x) { return ::emitc::tanh<Src>(x); }
+template <typename Src> inline Src add(Src x, Src y) {
+  return ::emitc::add<Src>(x, y);
+}
+template <typename Src>
+inline Src arithmetic_right_shift(Src x, Src y, bool round) {
+  using ET_Src = typename get_element_type<Src>::type;
+  std::function<ET_Src(ET_Src, ET_Src)> f;
+  if (round) {
+    f = [](ET_Src left, ET_Src right) {
+      ET_Src result = left >> right;
+      if (right > 0 && ((left >> (right - 1)) & 1) != 0) {
+        result++;
+      }
+      return result;
+    };
+  } else {
+    f = [](ET_Src left, ET_Src right) { return left >> right; };
+  }
+  return binary<Src>(x, y, f);
+}
+template <typename Dest, typename Src> inline Dest equal(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = [](ET_Src left, ET_Src right) { return left == right; };
+  return binary<Dest, Src>(x, y, f);
+}
+template <typename Dest, typename Src> inline Dest greater_equal(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = [](ET_Src left, ET_Src right) { return left >= right; };
+  return binary<Dest, Src>(x, y, f);
+}
+template <typename Src> inline Src logical_left_shift(Src x, Src y) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f = [](ET_Src left, ET_Src right) { return left << right; };
+  return binary<Src>(x, y, f);
+}
+template <typename Src> inline Src mul(Src x, Src y) {
+  return ::emitc::mul(x, y);
+}
+template <typename Src> inline Src maximum(Src x, Src y) {
+  return ::emitc::max(x, y);
+}
+template <typename Src> inline Src minimum(Src x, Src y) {
+  return ::emitc::min(x, y);
+}
+template <typename Src, IsTensorOfType<Src, int32_t> = true>
+inline Src mul(Src x, Src y, const int32_t shift) {
+  if (shift > 0) {
+    auto f = [&shift](int32_t x, int32_t y) -> int32_t {
+      int64_t result;
+      int64_t round = 1L << (shift - 1);
+      result = x * y + round;
+      result = result >> shift;
+      return static_cast<int32_t>(result);
+    };
+    return binary<Src>(x, y, f);
+  } else {
+    return ::emitc::mul(x, y);
+  }
+}
+template <typename Src> inline Src pow(Src x, Src y) {
+  return ::emitc::pow(x, y);
+}
+template <typename Src> inline Src sub(Src x, Src y) {
+  return ::emitc::sub<Src>(x, y);
+}
+template <size_t... Shape>
+inline ::llvm::emitc::Tensor<int8_t, Shape...>
+table(::llvm::emitc::Tensor<int8_t, Shape...> x, Tensor1D<int8_t, 256> table) {
+  auto f = [&table](int8_t element) {
+    return table(static_cast<int16_t>(element) + 128);
+  };
+  return unary<::llvm::emitc::Tensor<int8_t, Shape...>>(x, f);
+}
+template <size_t... Shape>
+inline ::llvm::emitc::Tensor<int32_t, Shape...>
+table(::llvm::emitc::Tensor<int16_t, Shape...> x,
+      Tensor1D<int16_t, 513> table) {
+  auto f = [&table](int16_t element) {
+    int32_t integer = (element >> 7) + 0x100;
+    int32_t fractional = element & 0x7F;
+    int32_t result_integer = table(integer);
+    int32_t result_fractional =
+        (table(integer + 1) - table(integer)) * fractional;
+    return (result_integer << 7) + result_fractional;
+  };
+  return unary<::llvm::emitc::Tensor<int32_t, Shape...>>(x, f);
+}
+template <typename Dest, typename SrcPred, typename SrcOperand>
+inline Dest select(SrcPred a, SrcOperand b, SrcOperand c) {
+  using ET_Src_Pred = typename get_element_type<SrcPred>::type;
+  static_assert(std::is_same<ET_Src_Pred, bool>::value,
+                "Pred tensor type must be bool");
+  using ET_Src_Operand = typename get_element_type<SrcOperand>::type;
+  auto f = [](ET_Src_Pred pred, ET_Src_Operand on_true,
+              ET_Src_Operand on_false) { return pred ? on_true : on_false; };
+  return ternary<Dest, SrcPred, SrcOperand, SrcOperand>(a, b, c, f);
+}
+template <int32_t Dimension, typename Dest, typename... Src>
+inline Dest concat(Src... inputs) {
+  return ::emitc::concatenate<Dimension, Dest, Src...>(inputs...);
+}
+template <typename Dest, typename Src, typename Weights>
+Dest conv2d(Src input, Weights weights, Tensor1D<int64_t, 4> padding,
+            Tensor1D<int64_t, 2> stride, Tensor1D<int64_t, 2> dilation) {
+  static_assert(is_tensor_of_dim<4, Src>::value,
+                "Expected 4 dimensional input");
+  static_assert(is_tensor_of_dim<4, Dest>::value,
+                "Expected 4 dimensional output");
+  static_assert(is_tensor_of_dim<4, Weights>::value,
+                "Expected 4 dimensional weights");
+  assert(stride[0] > 0);
+  assert(stride[1] > 0);
+  assert(dilation[0] == 1);
+  assert(dilation[1] == 1);
+  const int N = input.dim(0);
+  const int H_IN = input.dim(1);
+  const int W_IN = input.dim(2);
+  const int C_IN = input.dim(3);
+  Dest output;
+  const int C_OUT = output.dim(3);
+  const int K_H = weights.dim(1);
+  const int K_W = weights.dim(2);
+  const int S_H = stride[0];
+  const int S_W = stride[1];
+  const int pt = padding[0];
+  const int pb = padding[1];
+  const int pl = padding[2];
+  const int pr = padding[3];
+  const int H_PAD = pt + H_IN + pb;
+  const int W_PAD = pl + W_IN + pr;
+  for (int n = 0; n < N; n++) {
+    for (int h_pad = 0; h_pad < H_PAD - K_H + 1; h_pad += S_H) {
+      for (int w_pad = 0; w_pad < W_PAD - K_W + 1; w_pad += S_W) {
+        for (int kh = 0; kh < K_H; kh++) {
+          for (int kw = 0; kw < K_W; kw++) {
+            for (int c_in = 0; c_in < C_IN; c_in++) {
+              for (int c_out = 0; c_out < C_OUT; c_out++) {
+                const int h_out = h_pad / S_H;
+                const int w_out = w_pad / S_W;
+                const int h_in = h_pad - pt + kh;
+                const int w_in = w_pad - pl + kw;
+                if (h_in < 0 || h_in >= H_IN || w_in < 0 || w_in >= W_IN)
+                  continue;
+                output(n, h_out, w_out, c_out) +=
+                    input(n, h_in, w_in, c_in) * weights(c_out, kh, kw, c_in);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return output;
+}
+template <typename Dest, typename Src, typename Weights>
+Dest depthwise_conv2d(Src input, Weights weights, Tensor1D<int64_t, 4> padding,
+                      Tensor1D<int64_t, 2> stride,
+                      Tensor1D<int64_t, 2> dilation) {
+  static_assert(is_tensor_of_dim<4, Src>::value,
+                "Expected 4 dimensional input");
+  static_assert(is_tensor_of_dim<4, Dest>::value,
+                "Expected 4 dimensional output");
+  static_assert(is_tensor_of_dim<4, Weights>::value,
+                "Expected 4 dimensional weights");
+  static_assert(Src::dim(3) == Weights::dim(2),
+                "Input channels must equal weights channels");
+  static_assert(Src::dim(0) == Dest::dim(0), "Batch sizes must be equal");
+  static_assert(Dest::dim(3) % Src::dim(3) == 0,
+                "Output channels need to be a multiple of input channels");
+  static_assert(
+      Dest::dim(3) == Src::dim(3) * Weights::dim(3),
+      "Output channels size must be input channels times channel multiplier");
+  assert(stride[0] > 0);
+  assert(stride[1] > 0);
+  assert(dilation[0] == 1);
+  assert(dilation[1] == 1);
+  const int N = input.dim(0);
+  const int H_IN = input.dim(1);
+  const int W_IN = input.dim(2);
+  const int C_IN = input.dim(3);
+  Dest output;
+  const int K_H = weights.dim(0);
+  const int K_W = weights.dim(1);
+  const int M = weights.dim(3);
+  const int S_H = stride[0];
+  const int S_W = stride[1];
+  const int pt = padding[0];
+  const int pb = padding[1];
+  const int pl = padding[2];
+  const int pr = padding[3];
+  const int H_PAD = pt + H_IN + pb;
+  const int W_PAD = pl + W_IN + pr;
+  for (int n = 0; n < N; ++n) {
+    for (int h_pad = 0; h_pad < H_PAD - K_H + 1; h_pad += S_H) {
+      for (int w_pad = 0; w_pad < W_PAD - K_W + 1; w_pad += S_W) {
+        for (int kh = 0; kh < K_H; ++kh) {
+          for (int kw = 0; kw < K_W; ++kw) {
+            for (int c_in = 0; c_in < C_IN; ++c_in) {
+              for (int m = 0; m < M; ++m) {
+                const int h_out = h_pad / S_H;
+                const int w_out = w_pad / S_W;
+                const int c_out = c_in * M + m;
+                const int h_in = h_pad - pt + kh;
+                const int w_in = w_pad - pl + kw;
+                if (h_in < 0 || h_in >= H_IN || w_in < 0 || w_in >= W_IN)
+                  continue;
+                const size_t weights_index = ::emitc::utility::ravel_index<
+                    Weights::dim(0), Weights::dim(1), 1,
+                    Weights::dim(2) * Weights::dim(3)>(kh, kw, 0, c_out);
+                output(n, h_out, w_out, c_out) +=
+                    input(n, h_in, w_in, c_in) * weights[weights_index];
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return output;
+}
+template <typename Dest, typename Src, typename Weights, typename Bias>
+Dest fully_connected(Src input, Weights weights, Bias bias) {
+  static_assert(is_tensor_of_dim<2, Src>::value,
+                "Expected 2 dimensional input");
+  static_assert(is_tensor_of_dim<2, Dest>::value,
+                "Expected 2 dimensional output");
+  static_assert(is_tensor_of_dim<2, Weights>::value,
+                "Expected 2 dimensional weights");
+  static_assert(is_tensor_of_dim<1, Bias>::value,
+                "Expected 1 dimensional bias");
+  Dest output;
+  static_assert(input.dim(0) == output.dim(0),
+                "Output and input batch dimension do not match.");
+  static_assert(input.dim(1) == weights.dim(1),
+                "Input and weights dimensions do not match.");
+  static_assert(output.dim(1) == weights.dim(0),
+                "Output and weights dimensions do not match.");
+  static_assert(weights.dim(0) == bias.dim(0),
+                "Bias and weights dimensions do not match.");
+  const size_t N = input.dim(0);
+  const size_t C_IN = input.dim(1);
+  const size_t C_OUT = weights.dim(0);
+  for (size_t n = 0; n < N; ++n) {
+    for (size_t c_out = 0; c_out < C_OUT; ++c_out) {
+      for (size_t c_in = 0; c_in < C_IN; ++c_in) {
+        auto in = input(n, c_in);
+        auto weight = weights(c_out, c_in);
+        output(n, c_out) += in * weight;
+      }
+      output(n, c_out) += bias(c_out);
+    }
+  }
+  return output;
+}
+template <typename Dest, typename Src, typename Idx,
+          IsTensorOfDim<3, Dest> = true, IsTensorOfDim<3, Src> = true,
+          IsTensorOfDim<2, Idx> = true, IsTensorOfType<Idx, int32_t> = true>
+Dest gather(Src input, Idx indices) {
+  Dest result;
+  static_assert(input.dim(0) == result.dim(0),
+                "Input and output batch dimension do not match.");
+  static_assert(input.dim(0) == indices.dim(0),
+                "Input and weight batch dimension do not match.");
+  static_assert(input.dim(2) == result.dim(2),
+                "Input and output channel dimension do not match.");
+  static_assert(indices.dim(1) == result.dim(1),
+                "Weight and output index dimension do not match.");
+  auto it = result.begin();
+  size_t d0offset = Src::dim(1) * Src::dim(2);
+  for (size_t i = 0, idx = Idx::size(); i < idx; i++) {
+    auto d0 = d0offset * (i / Idx::dim(1));
+    auto d1 = Src::dim(2) * indices[i];
+    auto start = input.begin() + d0 + d1;
+    auto end = start + Src::dim(2);
+    it = std::copy(start, end, it);
+  }
+  return result;
+}
+template <typename T, size_t B, size_t M, size_t K, size_t N>
+Tensor3D<T, B, M, N> matmul(Tensor3D<T, B, M, K> a, Tensor3D<T, B, K, N> b) {
+  return ::emitc::batch_matmul<Tensor3D<T, B, M, N>>(a, b);
+}
+namespace {
+template <typename Dest, typename Src, typename Computation>
+inline Dest reduce(Src operand, typename get_element_type<Src>::type initValue,
+                   int64_t dimension, Computation computation) {
+  static_assert(is_tensor<Src>::value, "Expected tensor argument");
+  static_assert(is_tensor<Dest>::value, "Expected tensor result");
+  using ET_Src = typename get_element_type<Src>::type;
+  using ET_Dest = typename get_element_type<Dest>::type;
+  static_assert(std::is_same<ET_Src, ET_Dest>::value, "Element type mismatch");
+  static_assert(Src::rank() == Dest::rank() + 1,
+                "source rank must equal dest rank + 1");
+  std::vector<size_t> retainedDimensions(Src::rank());
+  std::iota(retainedDimensions.begin(), retainedDimensions.end(), 0);
+  retainedDimensions.erase(retainedDimensions.begin() + dimension);
+  assert(retainedDimensions.size() == Dest::rank());
+  Dest result;
+  std::fill(result.begin(), result.end(), initValue);
+  for (size_t i = 0; i < operand.size(); ++i) {
+    auto value = operand[i];
+    auto index = operand.unravel_index(i);
+    std::array<size_t, Dest::rank()> reducedIndex;
+    size_t j = 0;
+    for (size_t dim : retainedDimensions) {
+      reducedIndex[j++] = index[dim];
+    }
+    auto reductionValue = result[result.ravel_index(reducedIndex)];
+    result[result.ravel_index(reducedIndex)] =
+        computation(reductionValue, value);
+  }
+  return result;
+}
+} // namespace
+template <typename Dest, typename Src>
+inline Dest argmax(Src operand, int64_t dimension) {
+  static_assert(is_tensor<Src>::value, "Expected tensor argument");
+  static_assert(is_tensor<Dest>::value, "Expected tensor result");
+  using ET_Src = typename get_element_type<Src>::type;
+  static_assert(Src::rank() == Dest::rank() + 1,
+                "source rank must equal dest rank + 1");
+  std::vector<size_t> retainedDimensions(Src::rank());
+  std::iota(retainedDimensions.begin(), retainedDimensions.end(), 0);
+  retainedDimensions.erase(retainedDimensions.begin() + dimension);
+  assert(retainedDimensions.size() == Dest::rank());
+  Dest result;
+  typename replace_element_type<ET_Src, Dest>::type maxValues;
+  std::fill(maxValues.begin(), maxValues.end(),
+            std::numeric_limits<ET_Src>::min());
+  for (size_t i = 0; i < operand.size(); ++i) {
+    auto value = operand[i];
+    auto index = operand.unravel_index(i);
+    std::array<size_t, Dest::rank()> reducedIndex;
+    size_t j = 0;
+    for (size_t dim : retainedDimensions) {
+      reducedIndex[j++] = index[dim];
+    }
+    auto destIndex = result.ravel_index(reducedIndex);
+    if (value > maxValues[destIndex]) {
+      maxValues[destIndex] = value;
+      result[destIndex] = index[dimension];
+    }
+  }
+  return result;
+}
+template <typename Dest, typename Src>
+inline Dest reduce_all(Src input, int64_t dimension) {
+  using ET_Src = typename get_element_type<Src>::type;
+  using ET_Dest = typename get_element_type<Dest>::type;
+  static_assert(std::is_same<ET_Src, bool>::value,
+                "Src tensor type must be bool");
+  static_assert(std::is_same<ET_Dest, bool>::value,
+                "Dest tensor type must be bool");
+  auto and_ = [](ET_Src a, ET_Src b) { return (a && b); };
+  return tosa::reduce<Dest, Src>(input, true, dimension, and_);
+}
+template <typename Dest, typename Src>
+inline Dest reduce_any(Src input, int64_t dimension) {
+  using ET_Src = typename get_element_type<Src>::type;
+  using ET_Dest = typename get_element_type<Dest>::type;
+  static_assert(std::is_same<ET_Src, bool>::value,
+                "Src tensor type must be bool");
+  static_assert(std::is_same<ET_Dest, bool>::value,
+                "Dest tensor type must be bool");
+  auto or_ = [](ET_Src a, ET_Src b) { return a || b; };
+  return tosa::reduce<Dest, Src>(input, false, dimension, or_);
+}
+template <typename Dest, typename Src>
+inline Dest reduce_max(Src input, int64_t dimension) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f =
+      static_cast<const ET_Src &(*)(const ET_Src &, const ET_Src &)>(std::max);
+  return tosa::reduce<Dest, Src>(input, std::numeric_limits<ET_Src>::min(),
+                                 dimension, f);
+}
+template <typename Dest, typename Src>
+inline Dest reduce_min(Src input, int64_t dimension) {
+  using ET_Src = typename get_element_type<Src>::type;
+  auto f =
+      static_cast<const ET_Src &(*)(const ET_Src &, const ET_Src &)>(std::min);
+  return tosa::reduce<Dest, Src>(input, std::numeric_limits<ET_Src>::max(),
+                                 dimension, f);
+}
+template <typename Dest, typename Src>
+inline Dest reduce_prod(Src input, int64_t dimension) {
+  using ET_Src = typename get_element_type<Src>::type;
+  return tosa::reduce<Dest, Src>(input, 1, dimension,
+                                 std::multiplies<ET_Src>{});
+}
+template <typename Dest, typename Src>
+inline Dest reduce_sum(Src input, int64_t dimension) {
+  using ET_Src = typename get_element_type<Src>::type;
+  return tosa::reduce<Dest, Src>(input, 0, dimension, std::plus<ET_Src>{});
+}
+template <typename Dest, typename Src> inline Dest reshape(Src x) {
+  return ::emitc::reshape<Dest>(x);
+}
+template <typename Dest, typename Src>
+Dest slice(Src x, ::llvm::emitc::Tensor<int64_t, Src::rank()> start_indices,
+           ::llvm::emitc::Tensor<int64_t, Src::rank()> slice_sizes) {
+  ::llvm::emitc::Tensor<int64_t, Src::rank()> limit_indices =
+      ::emitc::add(start_indices, slice_sizes);
+  ::llvm::emitc::Tensor<int64_t, Src::rank()> strides =
+      ::emitc::tensor::splat<::llvm::emitc::Tensor<int64_t, Src::rank()>>(1);
+  return ::emitc::slice<Dest, Src>(x, start_indices, limit_indices, strides);
+}
+template <typename Dest, typename Src, typename Padding>
+inline Dest pad(Src operand, Padding padding,
+                Tensor0D<typename get_element_type<Src>::type> pad_const =
+                    Tensor0D<typename get_element_type<Src>::type>{0}) {
+  using ET_Padding = typename get_element_type<Padding>::type;
+  static_assert(is_tensor<Dest>::value, "Expected tensor result");
+  static_assert(is_tensor<Src>::value, "Expected tensor argument");
+  static_assert(is_tensor<Padding>::value, "Expected tensor argument");
+  static_assert(Padding::rank() == 2, "Padding must have rank 2");
+  static_assert(Padding::dim(0) == Src::rank(),
+                "Dimension 1 of padding must equal source rank");
+  static_assert(Padding::dim(1) == 2, "Dimension 2 of padding is must be 2");
+  static_assert(std::is_same<ET_Padding, int32_t>::value ||
+                    std::is_same<ET_Padding, int64_t>::value,
+                "Padding element type must be i32 or i64");
+  ::llvm::emitc::Tensor<int64_t, Src::rank()> edge_padding_low;
+  ::llvm::emitc::Tensor<int64_t, Src::rank()> edge_padding_high;
+  for (unsigned int i = 0; i < padding.dim(0); ++i) {
+    edge_padding_low(i) = padding(i, 0);
+    edge_padding_high(i) = padding(i, 1);
+  }
+  ::llvm::emitc::Tensor<int64_t, Src::rank()> interior_padding;
+  std::fill(interior_padding.begin(), interior_padding.end(), 0);
+  return ::emitc::pad<Dest>(operand, pad_const, edge_padding_low,
+                            edge_padding_high, interior_padding);
+}
+template <typename Dest, typename Src, IsTensorOfDim<1, Dest> = true>
+Dest tile(Src input, Tensor1D<int64_t, 1> multiples) {
+  Dest result;
+  auto it = result.begin();
+  for (int32_t i = 0, M0 = multiples[0]; i < M0; i++) {
+    it = std::copy(input.begin(), input.end(), it);
+  }
+  return result;
+}
+template <typename Dest, typename Src, IsTensorOfDim<2, Src> = true>
+Dest tile(Src input, Tensor1D<int64_t, 2> multiples) {
+  Dest result;
+  auto it = result.begin();
+  for (int32_t i = 0, M0 = multiples[0]; i < M0; i++) {
+    for (int32_t j = 0, D0 = Src::dim(0); j < D0; j++) {
+      for (int32_t k = 0, M1 = multiples[1]; k < M1; k++) {
+        auto start = input.begin() + j * Src::dim(1);
+        auto end = start + Src::dim(1);
+        it = std::copy(start, end, it);
+      }
+    }
+  }
+  return result;
+}
+template <typename Dest, typename Src, IsTensorOfDim<3, Src> = true>
+Dest tile(Src input, Tensor1D<int64_t, 3> multiples) {
+  Dest result;
+  auto it = result.begin();
+  for (int32_t m0 = 0, M0 = multiples[0]; m0 < M0; m0++) {
+    for (int32_t d0 = 0, D0 = Src::dim(0); d0 < D0; d0++) {
+      for (int32_t m1 = 0, M1 = multiples[1]; m1 < M1; m1++) {
+        for (int32_t d1 = 0, D1 = Src::dim(1); d1 < D1; d1++) {
+          for (int32_t m2 = 0, M2 = multiples[2]; m2 < M2; m2++) {
+            auto start = input.begin() + (d0 * Src::dim(1) + d1) * Src::dim(2);
+            auto end = start + Src::dim(2);
+            it = std::copy(start, end, it);
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+template <typename Dest, typename Src, IsTensorOfDim<4, Src> = true>
+Dest tile(Src input, Tensor1D<int64_t, 4> multiples) {
+  Dest result;
+  auto it = result.begin();
+  for (int32_t m0 = 0, M0 = multiples[0]; m0 < M0; m0++) {
+    for (int32_t d0 = 0, D0 = Src::dim(0); d0 < D0; d0++) {
+      for (int32_t m1 = 0, M1 = multiples[1]; m1 < M1; m1++) {
+        for (int32_t d1 = 0, D1 = Src::dim(1); d1 < D1; d1++) {
+          for (int32_t m2 = 0, M2 = multiples[2]; m2 < M2; m2++) {
+            for (int32_t d2 = 0, D2 = Src::dim(2); d2 < D2; d2++) {
+              for (int32_t m3 = 0, M3 = multiples[3]; m3 < M3; m3++) {
+                auto start =
+                    input.begin() +
+                    ((d0 * Src::dim(1) + d1) * Src::dim(2) + d2) * Src::dim(3);
+                auto end = start + Src::dim(3);
+                it = std::copy(start, end, it);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+template <typename Dest, typename Src>
+inline Dest transpose(Src operand, Tensor1D<int64_t, Src::rank()> perms) {
+  static_assert(is_tensor<Src>::value, "Expected tensor argument");
+  static_assert(is_tensor<Dest>::value, "Expected tensor result");
+  Tensor1D<int64_t, Src::rank()> broadcast_dimensions;
+  for (size_t i = 0; i < perms.size(); ++i) {
+    auto pos = std::find(perms.begin(), perms.end(), i);
+    assert(pos != std::end(perms));
+    int64_t index = std::distance(perms.begin(), pos);
+    broadcast_dimensions[i] = index;
+  }
+  return ::emitc::broadcast_in_dim<Dest>(operand, broadcast_dimensions);
+}
+template <typename Dest, typename Src>
+inline Dest transpose(Src input, Tensor1D<int32_t, Src::rank()> perms) {
+  Tensor1D<int64_t, Src::rank()> permsInt64;
+  for (size_t i = 0; i < perms.size(); ++i) {
+    permsInt64[i] = static_cast<int64_t>(perms[i]);
+  }
+  return tosa::transpose<Dest>(input, permsInt64);
+}
+} // namespace tosa
+} // namespace emitc
+} // namespace
+namespace llvm::emitc::generated {
+void InlineOzTestModel::set_callsite_cost(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v1 = std::move(x);
+}
+void InlineOzTestModel::set_is_multiple_blocks(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v2 = std::move(x);
+}
+void InlineOzTestModel::set_caller_conditionally_executed_blocks(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v3 = std::move(x);
+}
+void InlineOzTestModel::set_inlining_default(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v4 = std::move(x);
+}
+void InlineOzTestModel::set_cold_cc_penalty(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v5 = std::move(x);
+}
+void InlineOzTestModel::set_callee_conditionally_executed_blocks(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v6 = std::move(x);
+}
+void InlineOzTestModel::set_callee_users(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v7 = std::move(x);
+}
+void InlineOzTestModel::set_callee_basic_block_count(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v8 = std::move(x);
+}
+void InlineOzTestModel::set_nr_ctant_params(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v9 = std::move(x);
+}
+void InlineOzTestModel::set_load_relative_intrinsic(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v10 = std::move(x);
+}
+void InlineOzTestModel::set_jump_table_penalty(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v11 = std::move(x);
+}
+void InlineOzTestModel::set_unsimplified_common_instructions(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v12 = std::move(x);
+}
+void InlineOzTestModel::set_indirect_call_penalty(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v13 = std::move(x);
+}
+void InlineOzTestModel::set_load_elimination(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v14 = std::move(x);
+}
+void InlineOzTestModel::set_call_penalty(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v15 = std::move(x);
+}
+void InlineOzTestModel::set_cost_estimate(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v16 = std::move(x);
+}
+void InlineOzTestModel::set_case_cluster_penalty(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v17 = std::move(x);
+}
+void InlineOzTestModel::set_node_count(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v18 = std::move(x);
+}
+void InlineOzTestModel::set_call_argument_setup(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v19 = std::move(x);
+}
+void InlineOzTestModel::set_sroa_savings(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v20 = std::move(x);
+}
+void InlineOzTestModel::set_lowered_call_arg_setup(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v21 = std::move(x);
+}
+void InlineOzTestModel::set_threshold(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v22 = std::move(x);
+}
+void InlineOzTestModel::set_dead_blocks(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v23 = std::move(x);
+}
+void InlineOzTestModel::set_constant_args(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v24 = std::move(x);
+}
+void InlineOzTestModel::set_sroa_losses(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v25 = std::move(x);
+}
+void InlineOzTestModel::set_simplified_instructions(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v26 = std::move(x);
+}
+void InlineOzTestModel::set_num_loops(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v27 = std::move(x);
+}
+void InlineOzTestModel::set_step_type(::llvm::emitc::Tensor<int32_t, 1> x) {
+  v28 = std::move(x);
+}
+void InlineOzTestModel::set_edge_count(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v29 = std::move(x);
+}
+void InlineOzTestModel::set_nested_inlines(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v30 = std::move(x);
+}
+void InlineOzTestModel::set_caller_basic_block_count(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v31 = std::move(x);
+}
+void InlineOzTestModel::set_last_call_to_static_bonus(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v32 = std::move(x);
+}
+void InlineOzTestModel::set_nested_inline_cost_estimate(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v33 = std::move(x);
+}
+void InlineOzTestModel::set_callsite_height(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v34 = std::move(x);
+}
+void InlineOzTestModel::set_constant_offset_ptr_args(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v35 = std::move(x);
+}
+void InlineOzTestModel::set_switch_penalty(
+    ::llvm::emitc::Tensor<int64_t, 1> x) {
+  v36 = std::move(x);
+}
+void InlineOzTestModel::set_discount(::llvm::emitc::Tensor<float, 1> x) {
+  v37 = std::move(x);
+}
+void InlineOzTestModel::set_caller_users(::llvm::emitc::Tensor<int64_t, 1> x) {
+  v38 = std::move(x);
+}
+void InlineOzTestModel::set_reward(::llvm::emitc::Tensor<float, 1> x) {
+  v39 = std::move(x);
+}
+::llvm::emitc::Tensor<int64_t, 1> InlineOzTestModel::run() {
+  ::llvm::emitc::Tensor<int64_t, 1> v40 = {1};
+  return v40;
+}
+
+} // namespace llvm::emitc::generated
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -141,7 +141,9 @@
                clEnumValN(InliningAdvisorMode::Development, "development",
                           "Use development mode (runtime-loadable model)"),
                clEnumValN(InliningAdvisorMode::Release, "release",
-                          "Use release mode (AOT-compiled model)")));
+                          "Use release mode (AOT-compiled model)"),
+               clEnumValN(InliningAdvisorMode::EmitC, "emitc",
+                          "Use EmitC-compiled model")));
 
 static cl::opt<bool> EnableSyntheticCounts(
     "enable-npm-synthetic-counts", cl::Hidden,
diff --git a/llvm/test/Transforms/Inline/ML/ml-test-emitc-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-emitc-mode.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/ml-test-emitc-mode.ll
@@ -0,0 +1,6 @@
+; This test uses Inputs/test-module.ll, as it shares it with a similar test
+; for the 'development' and 'release' mode. The InlineOzTestModel inlines
+; everything.
+;
+; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=emitc -inliner-emitc-model-name=InlineOzTestModel -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK
+; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT