From b03dc6faa2ed3c025765380ca19bf2178833afde Mon Sep 17 00:00:00 2001
From: Pranav Sharma <prs@microsoft.com>
Date: Mon, 22 Jul 2019 11:09:12 -0700
Subject: [PATCH 1/5] Mention OrtCreateSessionFromArray in C API doc

---
 docs/C_API.md | 1 +
 1 file changed, 1 insertion(+)
diff --git a/docs/C_API.md b/docs/C_API.md
index e13ddecfb095e..ea99c5875fe45 100644
--- a/docs/C_API.md
+++ b/docs/C_API.md
@@ -11,6 +11,7 @@
 * Setting the thread pool size for each session.
 * Setting graph optimization level for each session.
 * Dynamically loading custom ops. [Instructions](/docs/AddingCustomOp.md)
+* Ability to load a model from a byte array. See ```OrtCreateSessionFromArray``` in [onnxruntime_c_api.h](/include/onnxruntime/core/session/onnxruntime_c_api.h).
 
 ## Usage Overview
 

From f5f2e0c588bf956652fe9b23dc12b593283e38c8 Mon Sep 17 00:00:00 2001
From: Pranav Sharma <prs@microsoft.com>
Date: Thu, 8 Aug 2019 23:52:57 -0700
Subject: [PATCH 2/5] Fix perf test executable due to removal of certain C APIs

---
 cmake/onnxruntime_unittests.cmake             |   4 +-
 onnxruntime/test/perftest/TestCase.cc         | 636 ++++++++++++++++++
 onnxruntime/test/perftest/TestCase.h          |  65 ++
 onnxruntime/test/perftest/heap_buffer.cc      |  10 +
 onnxruntime/test/perftest/heap_buffer.h       |  26 +
 onnxruntime/test/perftest/mem_buffer.h        |  21 +
 onnxruntime/test/perftest/tensorprotoutils.cc | 428 ++++++++++++
 onnxruntime/test/perftest/tensorprotoutils.h  |  36 +
 8 files changed, 1224 insertions(+), 2 deletions(-)
 create mode 100644 onnxruntime/test/perftest/TestCase.cc
 create mode 100644 onnxruntime/test/perftest/TestCase.h
 create mode 100644 onnxruntime/test/perftest/heap_buffer.cc
 create mode 100644 onnxruntime/test/perftest/heap_buffer.h
 create mode 100644 onnxruntime/test/perftest/mem_buffer.h
 create mode 100644 onnxruntime/test/perftest/tensorprotoutils.cc
 create mode 100644 onnxruntime/test/perftest/tensorprotoutils.h

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 7bc7c305787fe..0a52b840cd1f2 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -565,8 +565,8 @@ endif()
 onnxruntime_add_include_to_target(onnxruntime_perf_test gsl)
 
 if (onnxruntime_BUILD_SHARED_LIB)
-  set(onnxruntime_perf_test_libs onnxruntime_test_utils onnx_test_runner_common onnxruntime_common
-          onnx_test_data_proto onnx_proto libprotobuf ${GETOPT_LIB_WIDE} onnxruntime onnxruntime_framework onnx
+  set(onnxruntime_perf_test_libs onnxruntime_test_utils onnxruntime_common
+          onnx_test_data_proto onnx_proto libprotobuf ${GETOPT_LIB_WIDE} onnxruntime onnx
           ${SYS_PATH_LIB} ${CMAKE_DL_LIBS})
   if(onnxruntime_USE_NSYNC)
     list(APPEND onnxruntime_perf_test_libs nsync_cpp)
diff --git a/onnxruntime/test/perftest/TestCase.cc b/onnxruntime/test/perftest/TestCase.cc
new file mode 100644
index 0000000000000..238530172fac5
--- /dev/null
+++ b/onnxruntime/test/perftest/TestCase.cc
@@ -0,0 +1,636 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// needs to be included first to get around onnxruntime\cmake\external\onnx\onnx/common/constants.h(14): error C2513: 'bool': no variable declared before '='
+#include "tensorprotoutils.h"
+
+#include "TestCase.h"
+#include <fstream>
+#include <memory>
+#include "core/common/logging/logging.h"
+#include "core/common/common.h"
+#include "core/platform/env.h"
+#include "core/platform/ort_mutex.h"
+#include "core/session/onnxruntime_cxx_api.h"
+#include "core/framework/path_lib.h"
+#include "core/framework/allocator.h"
+#include <sstream>
+#include <map>
+#include <regex>
+#include "OrtValueList.h"
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#else
+#pragma warning(push)
+#pragma warning(disable : 4018) /*'expression' : signed/unsigned mismatch */
+#pragma warning(disable : 4065) /*switch statement contains 'default' but no 'case' labels*/
+#pragma warning(disable : 4100)
+#pragma warning(disable : 4505)
+#pragma warning(disable : 4146) /*unary minus operator applied to unsigned type, result still unsigned*/
+#pragma warning(disable : 4244) /*'conversion' conversion from 'type1' to 'type2', possible loss of data*/
+#pragma warning(disable : 4251) /*'identifier' : class 'type' needs to have dll-interface to be used by clients of class 'type2'*/
+#pragma warning(disable : 4267) /*'var' : conversion from 'size_t' to 'type', possible loss of data*/
+#pragma warning(disable : 4305) /*'identifier' : truncation from 'type1' to 'type2'*/
+#pragma warning(disable : 4307) /*'operator' : integral constant overflow*/
+#pragma warning(disable : 4309) /*'conversion' : truncation of constant value*/
+#pragma warning(disable : 4334) /*'operator' : result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?)*/
+#pragma warning(disable : 4355) /*'this' : used in base member initializer list*/
+#pragma warning(disable : 4506) /*no definition for inline function 'function'*/
+#pragma warning(disable : 4800) /*'type' : forcing value to bool 'true' or 'false' (performance warning)*/
+#pragma warning(disable : 4996) /*The compiler encountered a deprecated declaration.*/
+#endif
+#include <google/protobuf/util/delimited_message_util.h>
+#include "tml.pb.h"
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#else
+#pragma warning(pop)
+#endif
+
+using namespace onnxruntime;
+using namespace onnxruntime::common;
+using google::protobuf::RepeatedPtrField;
+
+using ORT_VALUE_HOLDER = std::unique_ptr<OrtValue, decltype(&OrtReleaseValue)>;
+
+const std::string TestModelInfo::unknown_version = "unknown version";
+
+namespace {
+template <typename T>
+ONNXTensorElementDataType NumericTypeToONNXType();
+template <>
+ONNXTensorElementDataType NumericTypeToONNXType<float>() {
+  return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
+}
+
+template <>
+ONNXTensorElementDataType NumericTypeToONNXType<double>() {
+  return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
+}
+
+template <>
+ONNXTensorElementDataType NumericTypeToONNXType<int64_t>() {
+  return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
+}
+
+template <>
+ONNXTensorElementDataType NumericTypeToONNXType<std::string>() {
+  return ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
+}
+
+template <typename T>
+OrtValue* CreateTensorWithDataAsOrtValue(OrtAllocatorInfo* info, std::vector<T>& input) {
+  std::vector<int64_t> dims(1, input.size());
+  OrtValue* ret = nullptr;
+  ORT_THROW_ON_ERROR(::OrtCreateTensorWithDataAsOrtValue(info, input.data(), input.size() * sizeof(T), dims.data(),
+                                                         dims.size(), NumericTypeToONNXType<T>(), &ret));
+  return ret;
+}
+
+template <typename key_type, typename value_type>
+OrtValue* PbMapToOrtValue(const google::protobuf::Map<key_type, value_type>& map) {
+  OrtAllocatorInfo* info;
+  ORT_THROW_ON_ERROR(OrtCreateCpuAllocatorInfo(OrtDeviceAllocator, OrtMemTypeDefault, &info));
+  std::unique_ptr<OrtAllocatorInfo, decltype(&OrtReleaseAllocatorInfo)> rel_info(info, OrtReleaseAllocatorInfo);
+  const size_t ele_count = map.size();
+  std::vector<int64_t> dims(1, ele_count);
+  std::vector<key_type> keys(ele_count);
+  std::vector<value_type> values(ele_count);
+  size_t i = 0;
+  for (auto& kvp : map) {
+    keys[i] = kvp.first;
+    values[i] = kvp.second;
+    ++i;
+  }
+  OrtValueArray map_in(2);
+  OrtValue* p = CreateTensorWithDataAsOrtValue(info, keys);
+  if (p == nullptr) ORT_THROW("Create keys tensor failed");
+  map_in.Set(0, p);
+
+  p = CreateTensorWithDataAsOrtValue(info, values);
+  if (p == nullptr) ORT_THROW("Create values tensor failed");
+  map_in.Set(1, p);
+
+  // create map ort value
+  OrtValue* map_ort = nullptr;
+  ORT_THROW_ON_ERROR(OrtCreateValue(map_in.Data(), map_in.Length(), ONNX_TYPE_MAP, &map_ort));
+  return map_ort;
+}
+
+template <typename T>
+void VectorProtoToOrtValue(const RepeatedPtrField<T>& input, ORT_VALUE_HOLDER& output) {
+  OrtAllocatorInfo* info;
+  ORT_THROW_ON_ERROR(OrtCreateCpuAllocatorInfo(OrtDeviceAllocator, OrtMemTypeDefault, &info));
+  std::unique_ptr<OrtAllocatorInfo, decltype(&OrtReleaseAllocatorInfo)> rel_info(info, OrtReleaseAllocatorInfo);
+  OrtValueArray in(input.size());
+  size_t j = 0;
+  for (const T& v : input) {
+    // create key tensor
+    const auto& map = v.v();
+    size_t ele_count = map.size();
+    using key_type = typename std::remove_reference<decltype(v.v())>::type::key_type;
+    using value_type = typename std::remove_reference<decltype(v.v())>::type::mapped_type;
+    std::vector<int64_t> dims(1, static_cast<int64_t>(ele_count));
+    std::vector<key_type> keys(ele_count);
+    std::vector<value_type> values(ele_count);
+    size_t i = 0;
+    for (auto& kvp : map) {
+      keys[i] = kvp.first;
+      values[i] = kvp.second;
+      ++i;
+    }
+    OrtValueArray map_in(2);
+    OrtValue* p = CreateTensorWithDataAsOrtValue(info, keys);
+    if (p == nullptr) ORT_THROW("Create keys tensor failed");
+    map_in.Set(0, p);
+
+    p = CreateTensorWithDataAsOrtValue(info, values);
+    if (p == nullptr) ORT_THROW("Create values tensor failed");
+    map_in.Set(1, p);
+
+    // create map ort value
+    OrtValue* map_ort = nullptr;
+    ORT_THROW_ON_ERROR(OrtCreateValue(map_in.Data(), map_in.Length(), ONNX_TYPE_MAP, &map_ort));
+    in.Set(j++, map_ort);
+  }
+  OrtValue* seq_ort = nullptr;
+  ORT_THROW_ON_ERROR(OrtCreateValue(in.Data(), in.Length(), ONNX_TYPE_SEQUENCE, &seq_ort));
+  output.reset(seq_ort);
+}
+
+template <typename CHAR_T>
+static int ExtractFileNo(const std::basic_string<CHAR_T>& name) {
+  size_t p1 = name.rfind('.');
+  size_t p2 = name.rfind('_', p1);
+  ++p2;
+  std::basic_string<CHAR_T> number_str = name.substr(p2, p1 - p2);
+  const CHAR_T* start = number_str.c_str();
+  const CHAR_T* end = number_str.c_str();
+  long ret = OrtStrtol(start, const_cast<CHAR_T**>(&end));
+  if (end == start) {
+    ORT_THROW("parse file name failed");
+  }
+  return static_cast<int>(ret);
+}
+using PATH_STRING_TYPE = std::basic_string<PATH_CHAR_TYPE>;
+
+class OnnxModelInfo : public TestModelInfo {
+ private:
+  std::string node_name_;
+  std::string onnx_commit_tag_;
+  std::vector<ONNX_NAMESPACE::ValueInfoProto> input_value_info_;
+  std::vector<ONNX_NAMESPACE::ValueInfoProto> output_value_info_;
+
+  template <typename T>
+  static void RepeatedPtrFieldToVector(const ::google::protobuf::RepeatedPtrField<T>& input_value_info,
+                                       std::vector<T>& out) {
+    for (int i = 0; i != input_value_info.size(); ++i) {
+      out.push_back(input_value_info[i]);
+    }
+  }
+  const std::basic_string<PATH_CHAR_TYPE> model_url_;
+
+ public:
+  OnnxModelInfo(_In_ const PATH_CHAR_TYPE* model_url) : model_url_(model_url) {
+    // parse model
+    int model_fd;
+    auto st = Env::Default().FileOpenRd(model_url, model_fd);
+    if (!st.IsOK()) {
+      ORT_THROW(st.ErrorMessage());
+    }
+    google::protobuf::io::FileInputStream f(model_fd);
+    f.SetCloseOnDelete(true);
+    ONNX_NAMESPACE::ModelProto model_pb;
+    if (!model_pb.ParseFromZeroCopyStream(&f)) {
+      ORT_THROW("Failed to load model because protobuf parsing failed.");
+    }
+#ifdef __GNUG__
+    std::smatch match;
+    std::string url_string{model_url};
+    const std::regex onnx_tag_regex("onnx[0-9a-z]{3}");  //e.g. onnx141, onnx150, onnxtip
+    if (std::regex_search(url_string, match, onnx_tag_regex)) {
+      onnx_commit_tag_ = match[0].str();
+    } else {
+      onnx_commit_tag_ = TestModelInfo::unknown_version;
+    }
+#endif
+    const ONNX_NAMESPACE::GraphProto& graph = model_pb.graph();
+    if (graph.node().size() == 1) {
+      node_name_ = graph.node()[0].op_type();
+    }
+    std::unordered_set<std::string> initializer_names;
+    for (const auto& init : graph.initializer()) {
+      if (!init.has_name()) continue;
+      initializer_names.insert(init.name());
+    }
+    for (const auto& p : graph.input()) {
+      if (!p.has_name()) ORT_THROW("input without name??");
+      if (initializer_names.find(p.name()) == initializer_names.end()) input_value_info_.push_back(p);
+    }
+    RepeatedPtrFieldToVector(graph.output(), output_value_info_);
+  }
+
+  const PATH_CHAR_TYPE* GetModelUrl() const override { return model_url_.c_str(); }
+  std::string GetModelVersion() const override { return onnx_commit_tag_; }
+
+  const std::string& GetNodeName() const override { return node_name_; }
+  const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const override {
+    return &output_value_info_[i];
+  }
+  int GetInputCount() const override { return static_cast<int>(input_value_info_.size()); }
+  int GetOutputCount() const override { return static_cast<int>(output_value_info_.size()); }
+  const std::string& GetInputName(size_t i) const override { return input_value_info_[i].name(); }
+
+  const std::string& GetOutputName(size_t i) const override { return output_value_info_[i].name(); }
+};
+
+template <typename PATH_CHAR_TYPE>
+static void SortTensorFileNames(std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_pb_files) {
+  if (input_pb_files.size() <= 1) return;
+  std::sort(input_pb_files.begin(), input_pb_files.end(),
+            [](const std::basic_string<PATH_CHAR_TYPE>& left, const std::basic_string<PATH_CHAR_TYPE>& right) -> bool {
+              std::basic_string<PATH_CHAR_TYPE> leftname = GetLastComponent(left);
+              std::basic_string<PATH_CHAR_TYPE> rightname = GetLastComponent(right);
+              int left1 = ExtractFileNo(leftname);
+              int right1 = ExtractFileNo(rightname);
+              return left1 < right1;
+            });
+  for (size_t i = 0; i != input_pb_files.size(); ++i) {
+    int fileno = ExtractFileNo(GetLastComponent(input_pb_files[i]));
+    if (static_cast<size_t>(fileno) != i) {
+      ORT_THROW("illegal input file name:", ToMBString(input_pb_files[i]));
+    }
+  }
+}
+
+OrtValue* TensorToOrtValue(const ONNX_NAMESPACE::TensorProto& t, HeapBuffer& b) {
+  size_t len = 0;
+  auto status = onnxruntime::perftest::GetSizeInBytesFromTensorProto<0>(t, &len);
+  if (!status.IsOK()) {
+    ORT_THROW(status.ToString());
+  }
+  void* p = len == 0 ? nullptr : b.AllocMemory(len);
+  Ort::Value temp_value{nullptr};
+  OrtAllocatorInfo cpu_allocator_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
+  status = onnxruntime::perftest::TensorProtoToMLValue(t, onnxruntime::perftest::MemBuffer(p, len, cpu_allocator_info),
+                                                       temp_value);
+  if (!status.IsOK()) {
+    ORT_THROW(status.ToString());
+  }
+  return temp_value.release();
+}
+
+void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo* modelinfo,
+                  std::unordered_map<std::string, OrtValue*>& name_data_map, HeapBuffer& b, std::ostringstream& oss) {
+  google::protobuf::io::FileInputStream f(test_data_pb_fd);
+  f.SetCloseOnDelete(true);
+  google::protobuf::io::CodedInputStream coded_input(&f);
+  bool clean_eof = false;
+  int item_id = 1;
+  for (proto::TraditionalMLData data;
+       google::protobuf::util::ParseDelimitedFromCodedStream(&data, &coded_input, &clean_eof);
+       ++item_id, data.Clear()) {
+    try {
+      ORT_VALUE_HOLDER gvalue(nullptr, OrtReleaseValue);
+      switch (data.values_case()) {
+        case proto::TraditionalMLData::kVectorMapStringToFloat:
+          VectorProtoToOrtValue(data.vector_map_string_to_float().v(), gvalue);
+          break;
+        case proto::TraditionalMLData::kVectorMapInt64ToFloat:
+          VectorProtoToOrtValue(data.vector_map_int64_to_float().v(), gvalue);
+          break;
+        case proto::TraditionalMLData::kMapStringToString:
+          gvalue.reset(PbMapToOrtValue(data.map_string_to_string().v()));
+          break;
+        case proto::TraditionalMLData::kMapStringToInt64:
+          gvalue.reset(PbMapToOrtValue(data.map_string_to_int64().v()));
+          break;
+        case proto::TraditionalMLData::kMapStringToFloat:
+          gvalue.reset(PbMapToOrtValue(data.map_string_to_float().v()));
+          break;
+        case proto::TraditionalMLData::kMapStringToDouble:
+          gvalue.reset(PbMapToOrtValue(data.map_string_to_double().v()));
+          break;
+        case proto::TraditionalMLData::kMapInt64ToString:
+          gvalue.reset(PbMapToOrtValue(data.map_int64_to_string().v()));
+          break;
+        case proto::TraditionalMLData::kMapInt64ToInt64:
+          gvalue.reset(PbMapToOrtValue(data.map_int64_to_int64().v()));
+          break;
+        case proto::TraditionalMLData::kMapInt64ToFloat:
+          gvalue.reset(PbMapToOrtValue(data.map_int64_to_float().v()));
+          break;
+        case proto::TraditionalMLData::kMapInt64ToDouble:
+          gvalue.reset(PbMapToOrtValue(data.map_int64_to_double().v()));
+          break;
+        case proto::TraditionalMLData::kTensor: {
+          gvalue.reset(TensorToOrtValue(data.tensor(), b));
+        } break;
+        default:
+          ORT_NOT_IMPLEMENTED("unknown data type inside TraditionalMLData");
+      }
+      if (!data.debug_info().empty()) {
+        oss << ":" << data.debug_info();
+      }
+      std::string value_name = data.name();
+      if (value_name.empty()) {
+        const size_t c = name_data_map.size();
+        value_name = is_input ? modelinfo->GetInputName(c) : modelinfo->GetOutputName(c);
+      }
+
+      auto pv = name_data_map.insert(std::make_pair(value_name, gvalue.release()));
+      if (!pv.second) {
+        ORT_THROW("duplicated test data name");
+        break;
+      }
+    } catch (onnxruntime::NotImplementedException& ex) {
+      std::ostringstream oss2;
+      oss2 << "load the " << item_id << "-th item failed," << ex.what();
+      ORT_NOT_IMPLEMENTED(oss2.str());
+    } catch (std::exception& ex) {
+      std::ostringstream oss2;
+      oss2 << "load the " << item_id << "-th item failed," << ex.what();
+      ORT_THROW(oss2.str());
+    }
+  }
+  if (!clean_eof) {
+    ORT_THROW("parse input file failed, has extra unparsed data");
+  }
+}
+
+}  // namespace
+
+TestModelInfo* TestModelInfo::LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url) {
+  return new OnnxModelInfo(model_url);
+}
+
+/**
+   * test_case_dir must have contents of:
+   * model.onnx
+   * ???/input_??.pb
+   * ???/output_??.pb
+   * ???/input_??.pb
+   * ???/output_??.pb
+   */
+class OnnxTestCase : public ITestCase {
+ private:
+  std::string test_case_name_;
+  std::vector<std::string> debuginfo_strings;
+  onnxruntime::OrtMutex m_;
+
+  std::vector<std::basic_string<PATH_CHAR_TYPE>> test_data_dirs_;
+
+  std::string GetDatasetDebugInfoString(size_t dataset_id) override {
+    std::lock_guard<OrtMutex> l(m_);
+    if (dataset_id < debuginfo_strings.size()) {
+      return debuginfo_strings[dataset_id];
+    }
+    // return empty string
+    return std::string();
+  }
+
+  void ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto>& test_data_pbs, HeapBuffer& b, bool is_input,
+                       std::unordered_map<std::string, OrtValue*>& out);
+
+  std::once_flag model_parsed_;
+  std::once_flag config_parsed_;
+  double per_sample_tolerance_;
+  double relative_per_sample_tolerance_;
+  bool post_processing_;
+  TestModelInfo* model_info_;
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OnnxTestCase);
+
+ public:
+  OnnxTestCase(const std::string& test_case_name, TestModelInfo* model, double default_per_sample_tolerance,
+               double default_relative_per_sample_tolerance);
+  ~OnnxTestCase() override { delete model_info_; }
+  Status GetPerSampleTolerance(double* value) override;
+  Status GetRelativePerSampleTolerance(double* value) override;
+  Status GetPostProcessing(bool* value) override;
+
+  const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const override {
+    return model_info_->GetOutputInfoFromModel(i);
+  }
+
+  size_t GetDataCount() const override {
+    return test_data_dirs_.size();
+  }
+  const std::string& GetNodeName() const override { return model_info_->GetNodeName(); }
+
+  const PATH_CHAR_TYPE* GetModelUrl() const override { return model_info_->GetModelUrl(); }
+  const std::string& GetTestCaseName() const override {
+    return test_case_name_;
+  }
+  std::string GetTestCaseVersion() const override {
+    return model_info_->GetModelVersion();
+  }
+  void LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>&, bool is_input) override;
+};
+
+ITestCase* CreateOnnxTestCase(const std::string& test_case_name, TestModelInfo* model,
+                              double default_per_sample_tolerance, double default_relative_per_sample_tolerance) {
+  return new OnnxTestCase(test_case_name, model, default_per_sample_tolerance, default_relative_per_sample_tolerance);
+}
+
+Status OnnxTestCase::GetPerSampleTolerance(double* value) {
+  *value = per_sample_tolerance_;
+  return Status::OK();
+}
+
+Status OnnxTestCase::GetRelativePerSampleTolerance(double* value) {
+  *value = relative_per_sample_tolerance_;
+  return Status::OK();
+}
+
+Status OnnxTestCase::GetPostProcessing(bool* value) {
+  *value = post_processing_;
+  return Status::OK();
+}
+
+static std::string trim_str(const std::string& s) {
+  std::string ltrim = std::regex_replace(s, std::regex("^\\s+"), std::string(""));
+  std::string result = std::regex_replace(ltrim, std::regex("\\s+$"), std::string(""));
+  return result;
+}
+
+static bool read_config_file(const std::basic_string<PATH_CHAR_TYPE>& path, std::map<std::string, std::string>& fc) {
+  std::ifstream infile(path);
+  if (!infile.good()) {
+    return false;
+  }
+
+  for (std::string line; std::getline(infile, line);) {
+    std::istringstream ss(line);
+    if (line.empty()) {
+      continue;
+    }
+    std::vector<std::string> tokens;
+    for (std::string token; std::getline(ss, token, ':');) {
+      std::string trimmed_token = trim_str(token);
+      if (trimmed_token.empty()) {
+        continue;
+      }
+      tokens.push_back(trimmed_token);
+    }
+    fc[tokens[0]] = tokens[1];
+  }
+  return true;
+}
+
+//load tensors from disk
+template <typename PATH_STRING_TYPE>
+static void LoadTensors(const std::vector<PATH_STRING_TYPE>& pb_files,
+                        std::vector<ONNX_NAMESPACE::TensorProto>* input_pbs) {
+  for (size_t i = 0; i != pb_files.size(); ++i) {
+    int tensor_fd;
+    auto st = Env::Default().FileOpenRd(pb_files.at(i), tensor_fd);
+    if (!st.IsOK()) {
+      ORT_THROW("open file '", ToMBString(pb_files.at(i)), "' failed:", st.ErrorMessage());
+    }
+    google::protobuf::io::FileInputStream f(tensor_fd);
+    f.SetCloseOnDelete(true);
+    ONNX_NAMESPACE::TensorProto tensor;
+    if (!tensor.ParseFromZeroCopyStream(&f)) {
+      ORT_THROW("parse file '", ToMBString(pb_files.at(i)), "' failed");
+    }
+    input_pbs->emplace_back(tensor);
+  }
+}
+
+void OnnxTestCase::LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>& name_data_map,
+                                bool is_input) {
+  if (id >= test_data_dirs_.size()) {
+    ORT_THROW("index out of bound");
+  }
+
+  PATH_STRING_TYPE test_data_pb = ConcatPathComponent<PATH_CHAR_TYPE>(
+      test_data_dirs_[id], (is_input ? ORT_TSTR("inputs.pb") : ORT_TSTR("outputs.pb")));
+  int test_data_pb_fd;
+  auto st = Env::Default().FileOpenRd(test_data_pb, test_data_pb_fd);
+  if (st.IsOK()) {  //has an all-in-one input file
+    std::ostringstream oss;
+    {
+      std::lock_guard<OrtMutex> l(m_);
+      oss << debuginfo_strings[id];
+    }
+    try {
+      LoopDataFile(test_data_pb_fd, is_input, model_info_, name_data_map, b, oss);
+    } catch (std::exception& ex) {
+      std::ostringstream oss2;
+      oss2 << "parse data file \"" << ToMBString(test_data_pb) << "\" failed:" << ex.what();
+      ORT_THROW(oss.str());
+    }
+    {
+      std::lock_guard<OrtMutex> l(m_);
+      debuginfo_strings[id] = oss.str();
+    }
+    return;
+  }
+
+  std::vector<PATH_STRING_TYPE> test_data_pb_files;
+  const PATH_STRING_TYPE& dir_path = test_data_dirs_[id];
+  LoopDir(dir_path,
+          [&test_data_pb_files, &dir_path, is_input](const PATH_CHAR_TYPE* filename, OrtFileType f_type) -> bool {
+            if (filename[0] == '.') return true;
+            if (f_type != OrtFileType::TYPE_REG) return true;
+            std::basic_string<PATH_CHAR_TYPE> filename_str = filename;
+            if (!HasExtensionOf(filename_str, ORT_TSTR("pb"))) return true;
+            const std::basic_string<PATH_CHAR_TYPE> file_prefix =
+                is_input ? ORT_TSTR("input_") : ORT_TSTR("output_");
+            if (!filename_str.compare(0, file_prefix.length(), file_prefix)) {
+              std::basic_string<PATH_CHAR_TYPE> p = ConcatPathComponent<PATH_CHAR_TYPE>(dir_path, filename_str);
+              test_data_pb_files.push_back(p);
+            }
+            return true;
+          });
+  SortTensorFileNames(test_data_pb_files);
+
+  std::vector<ONNX_NAMESPACE::TensorProto> test_data_pbs;
+  LoadTensors(test_data_pb_files, &test_data_pbs);
+  ConvertTestData(test_data_pbs, b, is_input, name_data_map);
+}
+
+void OnnxTestCase::ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto>& test_data_pbs, HeapBuffer& b,
+                                   bool is_input, std::unordered_map<std::string, OrtValue*>& out) {
+  bool has_valid_names = true;
+  std::vector<std::string> var_names(test_data_pbs.size());
+  for (size_t input_index = 0; input_index != test_data_pbs.size(); ++input_index) {
+    std::string name = test_data_pbs[input_index].name();
+    if (name.empty()) {
+      has_valid_names = false;
+      break;
+    }
+    var_names[input_index] = name;
+  }
+  if (!has_valid_names) {
+    size_t count = static_cast<size_t>(is_input ? model_info_->GetInputCount() : model_info_->GetOutputCount());
+    if (count != test_data_pbs.size()) {
+      ORT_THROW("data count mismatch, expect ", count, ", got ", test_data_pbs.size());
+    }
+    for (size_t i = 0; i != count; ++i) {
+      var_names[i] = is_input ? model_info_->GetInputName(i) : model_info_->GetOutputName(i);
+    }
+  }
+  for (size_t input_index = 0; input_index != test_data_pbs.size(); ++input_index) {
+    std::string name = var_names[input_index];
+    const ONNX_NAMESPACE::TensorProto& input = test_data_pbs[input_index];
+    size_t len = 0;
+
+    auto status = onnxruntime::perftest::GetSizeInBytesFromTensorProto<0>(input, &len);
+    if (!status.IsOK()) {
+      ORT_THROW(status.ToString());
+    }
+    void* p = len == 0 ? nullptr : b.AllocMemory(len);
+    OrtAllocatorInfo cpu_allocator_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
+    Ort::Value v1{nullptr};
+    status = onnxruntime::perftest::TensorProtoToMLValue(input, onnxruntime::perftest::MemBuffer(p, len, cpu_allocator_info),
+                                                         v1);
+    if (!status.IsOK()) {
+      ORT_THROW(status.ToString());
+    }
+    out.insert(std::make_pair(name, v1.release()));
+  }
+}
+
+OnnxTestCase::OnnxTestCase(const std::string& test_case_name, _In_ TestModelInfo* model,
+                           double default_per_sample_tolerance, double default_relative_per_sample_tolerance)
+    : test_case_name_(test_case_name), model_info_(model) {
+  std::basic_string<PATH_CHAR_TYPE> test_case_dir = model_info_->GetDir();
+
+  // parse config
+  std::basic_string<PATH_CHAR_TYPE> config_path =
+      ConcatPathComponent<PATH_CHAR_TYPE>(test_case_dir, ORT_TSTR("config.txt"));
+  /* Note: protobuf-lite doesn't support reading protobuf files as text-format. Config.txt is exactly that.
+     That's the reason I've to parse the file in a different way to read the configs. Currently
+     this affects 2 tests - fp16_tiny_yolov2 and fp16_inception_v1. It's not clear why we've to use protobuf
+     to represent simple config files that have only key-value pairs.
+   */
+  std::map<std::string, std::string> fc;
+  per_sample_tolerance_ = default_per_sample_tolerance;
+  relative_per_sample_tolerance_ = default_relative_per_sample_tolerance;
+  post_processing_ = false;
+  if (read_config_file(config_path, fc)) {
+    if (fc.count("per_sample_tolerance") > 0) {
+      per_sample_tolerance_ = stod(fc["per_sample_tolerance"]);
+    }
+    if (fc.count("relative_per_sample_tolerance") > 0) {
+      relative_per_sample_tolerance_ = stod(fc["relative_per_sample_tolerance"]);
+    }
+    if (fc.count("post_processing") > 0) {
+      post_processing_ = fc["post_processing"] == "true";
+    }
+  }
+
+  LoopDir(test_case_dir, [&test_case_dir, this](const PATH_CHAR_TYPE* filename, OrtFileType f_type) -> bool {
+    if (filename[0] == '.') return true;
+    if (f_type == OrtFileType::TYPE_DIR) {
+      std::basic_string<PATH_CHAR_TYPE> p = ConcatPathComponent<PATH_CHAR_TYPE>(test_case_dir, filename);
+      test_data_dirs_.push_back(p);
+      debuginfo_strings.push_back(ToMBString(p));
+    }
+    return true;
+  });
+}
diff --git a/onnxruntime/test/perftest/TestCase.h b/onnxruntime/test/perftest/TestCase.h
new file mode 100644
index 0000000000000..119f1673125ad
--- /dev/null
+++ b/onnxruntime/test/perftest/TestCase.h
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include <vector>
+#include <mutex>
+#include <unordered_map>
+#include <core/common/common.h>
+#include <core/common/status.h>
+#include <core/session/onnxruntime_cxx_api.h>
+#include <core/framework/path_lib.h>
+#include "heap_buffer.h"
+
+namespace ONNX_NAMESPACE {
+class ValueInfoProto;
+}
+
+//One test case is for one model file
+//One test case can contain multiple test data(input/output pairs)
+class ITestCase {
+ public:
+  virtual void LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>& name_data_map,
+                            bool is_input) = 0;
+  virtual const PATH_CHAR_TYPE* GetModelUrl() const = 0;
+  virtual const std::string& GetNodeName() const = 0;
+  virtual const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const = 0;
+
+  virtual const std::string& GetTestCaseName() const = 0;
+  virtual std::string GetTestCaseVersion() const = 0;
+  //a string to help identify the dataset
+  virtual std::string GetDatasetDebugInfoString(size_t dataset_id) = 0;
+  //The number of input/output pairs
+  virtual size_t GetDataCount() const = 0;
+  virtual ~ITestCase() = default;
+  virtual ::onnxruntime::common::Status GetPerSampleTolerance(double* value) = 0;
+  virtual ::onnxruntime::common::Status GetRelativePerSampleTolerance(double* value) = 0;
+  virtual ::onnxruntime::common::Status GetPostProcessing(bool* value) = 0;
+};
+
+class TestModelInfo {
+ public:
+  virtual const PATH_CHAR_TYPE* GetModelUrl() const = 0;
+  virtual std::basic_string<PATH_CHAR_TYPE> GetDir() const {
+    std::basic_string<PATH_CHAR_TYPE> test_case_dir;
+    auto st = onnxruntime::GetDirNameFromFilePath(GetModelUrl(), test_case_dir);
+    if (!st.IsOK()) {
+      ORT_THROW("GetDirNameFromFilePath failed");
+    }
+    return test_case_dir;
+  }
+  virtual const std::string& GetNodeName() const = 0;
+  virtual const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const = 0;
+  virtual int GetInputCount() const = 0;
+  virtual int GetOutputCount() const = 0;
+  virtual const std::string& GetInputName(size_t i) const = 0;
+  virtual const std::string& GetOutputName(size_t i) const = 0;
+  virtual std::string GetModelVersion() const { return ""; }
+  virtual ~TestModelInfo() = default;
+
+  static TestModelInfo* LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url);
+  static const std::string unknown_version;
+};
+
+ITestCase* CreateOnnxTestCase(const std::string& test_case_name, TestModelInfo* model,
+                              double default_per_sample_tolerance, double default_relative_per_sample_tolerance);
diff --git a/onnxruntime/test/perftest/heap_buffer.cc b/onnxruntime/test/perftest/heap_buffer.cc
new file mode 100644
index 0000000000000..5305f684eb392
--- /dev/null
+++ b/onnxruntime/test/perftest/heap_buffer.cc
@@ -0,0 +1,10 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "heap_buffer.h"
+
+HeapBuffer::~HeapBuffer() {
+  for (void* p : buffers_) {
+    free(p);
+  }
+}
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/heap_buffer.h b/onnxruntime/test/perftest/heap_buffer.h
new file mode 100644
index 0000000000000..f888818ef1f4c
--- /dev/null
+++ b/onnxruntime/test/perftest/heap_buffer.h
@@ -0,0 +1,26 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+#include <vector>
+#include <memory>
+
+/**
+ * A holder for delay freed buffers
+ */
+class HeapBuffer {
+ public:
+  HeapBuffer() = default;
+  /**
+   * free all the buffers allocated from 'AllocMemory' function
+   */
+  ~HeapBuffer();
+  void* AllocMemory(size_t size) {
+    void* p = malloc(size);
+    buffers_.push_back(p);
+    return p;
+  }
+
+ private:
+  std::vector<void*> buffers_;
+};
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/mem_buffer.h b/onnxruntime/test/perftest/mem_buffer.h
new file mode 100644
index 0000000000000..d5b7a03de7a7f
--- /dev/null
+++ b/onnxruntime/test/perftest/mem_buffer.h
@@ -0,0 +1,21 @@
+#pragma once
+#include "core/common/common.h"
+
+namespace onnxruntime {
+namespace perftest {
+class MemBuffer {
+ public:
+  MemBuffer(void* buffer, size_t len, const OrtAllocatorInfo& alloc_info)
+      : buffer_(buffer), len_(len), alloc_info_(alloc_info) {}
+  void* GetBuffer() const { return buffer_; }
+
+  size_t GetLen() const { return len_; }
+  const OrtAllocatorInfo& GetAllocInfo() const { return alloc_info_; }
+
+ private:
+  void* const buffer_;
+  const size_t len_;
+  const OrtAllocatorInfo& alloc_info_;
+};
+}  // namespace perftest
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/tensorprotoutils.cc b/onnxruntime/test/perftest/tensorprotoutils.cc
new file mode 100644
index 0000000000000..171e74d2d92e7
--- /dev/null
+++ b/onnxruntime/test/perftest/tensorprotoutils.cc
@@ -0,0 +1,428 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "tensorprotoutils.h"
+
+#include <memory>
+#include <algorithm>
+#include <limits>
+#include <gsl/pointers>
+#include "core/framework/data_types.h"
+#include "core/framework/allocator.h"
+#include "onnx-ml.pb.h"
+#include "core/session/onnxruntime_cxx_api.h"
+
+namespace onnxruntime {
+namespace perftest {
+#ifdef __GNUC__
+constexpr inline bool IsLittleEndianOrder() noexcept { return __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; }
+#else
+// On Windows and Mac, this function should always return true
+GSL_SUPPRESS(type .1)  // allow use of reinterpret_cast for this special case
+inline bool IsLittleEndianOrder() noexcept {
+  static int n = 1;
+  return (*reinterpret_cast<char*>(&n) == 1);
+}
+#endif
+
+//From core common
+inline void MakeStringInternal(std::ostringstream& /*ss*/) noexcept {
+}
+
+template <typename T>
+inline void MakeStringInternal(std::ostringstream& ss, const T& t) noexcept {
+  ss << t;
+}
+
+template <typename T, typename... Args>
+inline void MakeStringInternal(std::ostringstream& ss, const T& t, const Args&... args) noexcept {
+  ::onnxruntime::MakeStringInternal(ss, t);
+  ::onnxruntime::MakeStringInternal(ss, args...);
+}
+
+template <typename... Args>
+std::string MakeString(const Args&... args) {
+  std::ostringstream ss;
+  ::onnxruntime::MakeStringInternal(ss, args...);
+  return std::string(ss.str());
+}
+
+// Specializations for already-a-string types.
+template <>
+inline std::string MakeString(const std::string& str) {
+  return str;
+}
+inline std::string MakeString(const char* p_str) {
+  return p_str;
+}
+
+std::vector<int64_t> GetTensorShapeFromTensorProto(const onnx::TensorProto& tensor_proto) {
+  const auto& dims = tensor_proto.dims();
+  std::vector<int64_t> tensor_shape_vec(static_cast<size_t>(dims.size()));
+  for (int i = 0; i < dims.size(); ++i) {
+    tensor_shape_vec[i] = dims[i];
+  }
+
+  return tensor_shape_vec;
+}
+
+// This function doesn't support string tensors
+template <typename T>
+static void UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length, size_t expected_size,
+                                    /*out*/ T* p_data) {
+  // allow this low level routine to be somewhat unsafe. assuming it's thoroughly tested and valid
+  GSL_SUPPRESS(type)       // type.1 reinterpret-cast; type.4 C-style casts; type.5 'T result;' is uninitialized;
+  GSL_SUPPRESS(bounds .1)  // pointer arithmetic
+  GSL_SUPPRESS(f .23)      // buff and temp_bytes never tested for nullness and could be gsl::not_null
+  {
+    size_t expected_size_in_bytes;
+    if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_size, sizeof(T), &expected_size_in_bytes)) {
+      throw Ort::Exception("size overflow", OrtErrorCode::ORT_FAIL);
+    }
+    if (raw_data_length != expected_size_in_bytes)
+      throw Ort::Exception(MakeString("UnpackTensor: the pre-allocated size does not match the raw data size, expected ",
+                                      expected_size_in_bytes, ", got ", raw_data_length),
+                           OrtErrorCode::ORT_FAIL);
+    if (IsLittleEndianOrder()) {
+      memcpy(p_data, raw_data, raw_data_length);
+    } else {
+      const size_t type_size = sizeof(T);
+      const char* buff = reinterpret_cast<const char*>(raw_data);
+      for (size_t i = 0; i < raw_data_length; i += type_size, buff += type_size) {
+        T result;
+        const char* temp_bytes = reinterpret_cast<char*>(&result);
+        for (size_t j = 0; j < type_size; ++j) {
+          memcpy((void*)&temp_bytes[j], (void*)&buff[type_size - 1 - i], 1);
+        }
+        p_data[i] = result;
+      }
+    }
+  }
+}
+
+// This macro doesn't work for Float16/bool/string tensors
+#define DEFINE_UNPACK_TENSOR(T, Type, field_name, field_size)                                                \
+  template <>                                                                                                \
+  void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,              \
+                    /*out*/ T* p_data, int64_t expected_size) {                                              \
+    if (nullptr == p_data) {                                                                                 \
+      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                          \
+      if (size == 0) return;                                                                                 \
+      throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);                                          \
+    }                                                                                                        \
+    if (nullptr == p_data || Type != tensor.data_type()) {                                                   \
+      throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);                                          \
+    }                                                                                                        \
+    if (raw_data != nullptr) {                                                                               \
+      UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);                                \
+      return;                                                                                                \
+    }                                                                                                        \
+    if (tensor.field_size() != expected_size)                                                                \
+      throw Ort::Exception(MakeString("corrupted protobuf data: tensor shape size(", expected_size,          \
+                                      ") does not match the data size(", tensor.field_size(), ") in proto"), \
+                           OrtErrorCode::ORT_FAIL);                                                          \
+    auto& data = tensor.field_name();                                                                        \
+    for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                              \
+      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                    \
+    return;                                                                                                  \
+  }
+
+// TODO: complex64 complex128
+DEFINE_UNPACK_TENSOR(float, onnx::TensorProto_DataType_FLOAT, float_data, float_data_size)
+DEFINE_UNPACK_TENSOR(double, onnx::TensorProto_DataType_DOUBLE, double_data, double_data_size);
+DEFINE_UNPACK_TENSOR(uint8_t, onnx::TensorProto_DataType_UINT8, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(int8_t, onnx::TensorProto_DataType_INT8, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(int16_t, onnx::TensorProto_DataType_INT16, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(uint16_t, onnx::TensorProto_DataType_UINT16, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(int32_t, onnx::TensorProto_DataType_INT32, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(int64_t, onnx::TensorProto_DataType_INT64, int64_data, int64_data_size)
+DEFINE_UNPACK_TENSOR(uint64_t, onnx::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
+DEFINE_UNPACK_TENSOR(uint32_t, onnx::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)
+
+// doesn't support raw data
+template <>
+void UnpackTensor(const onnx::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
+                  /*out*/ std::string* p_data, int64_t expected_size) {
+  if (nullptr == p_data) {
+    if (tensor.string_data_size() == 0) return;
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
+  }
+  if (onnx::TensorProto_DataType_STRING != tensor.data_type()) {
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
+  }
+
+  if (tensor.string_data_size() != expected_size)
+    throw Ort::Exception(
+        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
+
+  auto& string_data = tensor.string_data();
+  for (const auto& iter : string_data) {
+    *p_data++ = iter;
+  }
+
+  return;
+}
+template <>
+void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                  /*out*/ bool* p_data, int64_t expected_size) {
+  if (nullptr == p_data) {
+    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
+    if (size == 0) return;
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
+  }
+  if (onnx::TensorProto_DataType_BOOL != tensor.data_type()) {
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
+  }
+
+  if (raw_data != nullptr) {
+    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
+  }
+
+  if (tensor.int32_data_size() != expected_size)
+    throw Ort::Exception(
+        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
+  for (int iter : tensor.int32_data()) {
+    *p_data++ = static_cast<bool>(iter);
+  }
+
+  return;
+}
+template <>
+void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                  /*out*/ MLFloat16* p_data, int64_t expected_size) {
+  if (nullptr == p_data) {
+    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
+    if (size == 0) return;
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
+  }
+  if (onnx::TensorProto_DataType_FLOAT16 != tensor.data_type()) {
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
+  }
+
+  if (raw_data != nullptr) {
+    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
+  }
+
+  if (tensor.int32_data_size() != expected_size)
+    throw Ort::Exception(
+        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
+
+  constexpr int max_value = std::numeric_limits<uint16_t>::max();
+  for (int i = 0; i < static_cast<int>(expected_size); i++) {
+    int v = tensor.int32_data()[i];
+    if (v < 0 || v > max_value) {
+      throw Ort::Exception(
+          "data overflow", OrtErrorCode::ORT_FAIL);
+    }
+    p_data[i] = MLFloat16(static_cast<uint16_t>(v));
+  }
+
+  return;
+}
+
+template <>
+void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                  /*out*/ BFloat16* p_data, int64_t expected_size) {
+  if (nullptr == p_data) {
+    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
+    if (size == 0)
+      return;
+
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
+  }
+  if (onnx::TensorProto_DataType_BFLOAT16 != tensor.data_type()) {
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
+  }
+
+  if (raw_data != nullptr) {
+    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
+  }
+
+  if (tensor.int32_data_size() != expected_size)
+    throw Ort::Exception(
+        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
+
+  constexpr int max_value = std::numeric_limits<uint16_t>::max();
+  for (int i = 0; i < static_cast<int>(expected_size); i++) {
+    int v = tensor.int32_data()[i];
+    if (v < 0 || v > max_value) {
+      throw Ort::Exception(
+          "data overflow", OrtErrorCode::ORT_FAIL);
+    }
+    p_data[i] = BFloat16(static_cast<uint16_t>(v));
+  }
+
+  return;
+}
+
+#define CASE_PROTO_TRACE(X, Y)                                                            \
+  case onnx::TensorProto_DataType::TensorProto_DataType_##X:                              \
+    if (!IAllocator::CalcMemSizeForArrayWithAlignment<alignment>(size, sizeof(Y), out)) { \
+      throw Ort::Exception("Invalid TensorProto", OrtErrorCode::ORT_FAIL);                \
+    }                                                                                     \
+    break;
+
+template <size_t alignment>
+Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out) {
+  const auto& dims = tensor_proto.dims();
+  size_t size = 1;
+  for (google::protobuf::int64 dim : dims) {
+    if (dim < 0 || static_cast<uint64_t>(dim) >= std::numeric_limits<size_t>::max()) {
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
+    }
+    if (!IAllocator::CalcMemSizeForArray(size, static_cast<size_t>(dim), &size)) {
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
+    }
+  }
+  switch (tensor_proto.data_type()) {
+    CASE_PROTO_TRACE(FLOAT, float);
+    CASE_PROTO_TRACE(DOUBLE, double);
+    CASE_PROTO_TRACE(BOOL, bool);
+    CASE_PROTO_TRACE(INT8, int8_t);
+    CASE_PROTO_TRACE(INT16, int16_t);
+    CASE_PROTO_TRACE(INT32, int32_t);
+    CASE_PROTO_TRACE(INT64, int64_t);
+    CASE_PROTO_TRACE(UINT8, uint8_t);
+    CASE_PROTO_TRACE(UINT16, uint16_t);
+    CASE_PROTO_TRACE(UINT32, uint32_t);
+    CASE_PROTO_TRACE(UINT64, uint64_t);
+    CASE_PROTO_TRACE(FLOAT16, MLFloat16);
+    CASE_PROTO_TRACE(BFLOAT16, BFloat16);
+    CASE_PROTO_TRACE(STRING, std::string);
+    default:
+      return Status(common::ONNXRUNTIME, common::NOT_IMPLEMENTED);
+  }
+  return Status::OK();
+}
+
+struct UnInitializeParam {
+  void* preallocated;
+  size_t preallocated_size;
+  ONNXTensorElementDataType ele_type;
+};
+
+void OrtInitializeBufferForTensor(void* input, size_t input_len,
+                                  ONNXTensorElementDataType type) {
+  try {
+    if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return;
+    size_t tensor_size = input_len / sizeof(std::string);
+    std::string* ptr = reinterpret_cast<std::string*>(input);
+    for (size_t i = 0, n = tensor_size; i < n; ++i) {
+      new (ptr + i) std::string();
+    }
+  } catch (std::exception& ex) {
+    throw Ort::Exception(ex.what(), OrtErrorCode::ORT_RUNTIME_EXCEPTION);
+  }
+  return;
+}
+
+#define CASE_PROTO(X, Y)                                                                                           \
+  case onnx::TensorProto_DataType::TensorProto_DataType_##X:                                                       \
+    ::onnxruntime::perftest::UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, tensor_size); \
+    break;
+
+#define CASE_TYPE(X)                   \
+  case onnx::TensorProto_DataType_##X: \
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_##X;
+
+ONNXTensorElementDataType CApiElementTypeFromProtoType(int type) {
+  switch (type) {
+    CASE_TYPE(FLOAT)
+    CASE_TYPE(UINT8)
+    CASE_TYPE(INT8)
+    CASE_TYPE(UINT16)
+    CASE_TYPE(INT16)
+    CASE_TYPE(INT32)
+    CASE_TYPE(INT64)
+    CASE_TYPE(STRING)
+    CASE_TYPE(BOOL)
+    CASE_TYPE(FLOAT16)
+    CASE_TYPE(DOUBLE)
+    CASE_TYPE(UINT32)
+    CASE_TYPE(UINT64)
+    CASE_TYPE(COMPLEX64)
+    CASE_TYPE(COMPLEX128)
+    CASE_TYPE(BFLOAT16)
+    default:
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+  }
+}
+
+ONNXTensorElementDataType GetTensorElementType(const onnx::TensorProto& tensor_proto) {
+  return CApiElementTypeFromProtoType(tensor_proto.data_type());
+}
+
+Status TensorProtoToMLValue(const onnx::TensorProto& tensor_proto, const MemBuffer& m, Ort::Value& value) {
+  const OrtAllocatorInfo& allocator = m.GetAllocInfo();
+  ONNXTensorElementDataType ele_type = perftest::GetTensorElementType(tensor_proto);
+  const void* raw_data = nullptr;
+  size_t raw_data_len = 0;
+  void* tensor_data;
+  {
+    if (tensor_proto.data_location() == onnx::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL) {
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Server doesn't support external data.");
+    } else if (tensor_proto.has_raw_data()) {
+      if (ele_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING)
+        return Status(common::ONNXRUNTIME, common::FAIL, "String tensor cannot have raw data.");
+      raw_data = tensor_proto.raw_data().data();
+      raw_data_len = tensor_proto.raw_data().size();
+    }
+    {
+      void* preallocated = m.GetBuffer();
+      size_t preallocated_size = m.GetLen();
+      int64_t tensor_size = 1;
+      {
+        for (auto i : tensor_proto.dims()) {
+          if (i < 0) return Status(common::ONNXRUNTIME, common::FAIL, "Tensor can't contain negative dims");
+          tensor_size *= i;
+        }
+      }
+      // tensor_size could be zero. see test_slice_start_out_of_bounds\test_data_set_0\output_0.pb
+      if (static_cast<uint64_t>(tensor_size) > SIZE_MAX) {
+        return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Size overflow");
+      }
+      size_t size_to_allocate;
+      GetSizeInBytesFromTensorProto<0>(tensor_proto, &size_to_allocate);
+
+      if (preallocated && preallocated_size < size_to_allocate)
+        return Status(common::ONNXRUNTIME, common::FAIL, MakeString("The buffer planner is not consistent with tensor buffer size, expected ", size_to_allocate, ", got ", preallocated_size));
+      switch (tensor_proto.data_type()) {
+        CASE_PROTO(FLOAT, float);
+        CASE_PROTO(DOUBLE, double);
+        CASE_PROTO(BOOL, bool);
+        CASE_PROTO(INT8, int8_t);
+        CASE_PROTO(INT16, int16_t);
+        CASE_PROTO(INT32, int32_t);
+        CASE_PROTO(INT64, int64_t);
+        CASE_PROTO(UINT8, uint8_t);
+        CASE_PROTO(UINT16, uint16_t);
+        CASE_PROTO(UINT32, uint32_t);
+        CASE_PROTO(UINT64, uint64_t);
+        CASE_PROTO(FLOAT16, MLFloat16);
+        CASE_PROTO(BFLOAT16, BFloat16);
+        case onnx::TensorProto_DataType::TensorProto_DataType_STRING:
+          if (preallocated != nullptr) {
+            OrtInitializeBufferForTensor(preallocated, preallocated_size, ele_type);
+          }
+          ::onnxruntime::perftest::UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
+                                                             (std::string*)preallocated, tensor_size);
+          break;
+        default: {
+          std::ostringstream ostr;
+          ostr << "Initialized tensor with unexpected type: " << tensor_proto.data_type();
+          return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
+        }
+      }
+      tensor_data = preallocated;
+    }
+  }
+  std::vector<int64_t> tensor_shape_vec = GetTensorShapeFromTensorProto(tensor_proto);
+  // Note: We permit an empty tensor_shape_vec, and treat it as a scalar (a tensor of size 1).
+  value = Ort::Value::CreateTensor(&allocator, tensor_data, m.GetLen(), tensor_shape_vec.data(), tensor_shape_vec.size(), (ONNXTensorElementDataType)tensor_proto.data_type());
+  return Status::OK();
+}
+template Status GetSizeInBytesFromTensorProto<256>(const onnx::TensorProto& tensor_proto,
+                                                   size_t* out);
+template Status GetSizeInBytesFromTensorProto<0>(const onnx::TensorProto& tensor_proto, size_t* out);
+}  // namespace perftest
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/tensorprotoutils.h b/onnxruntime/test/perftest/tensorprotoutils.h
new file mode 100644
index 0000000000000..c9f954d0ebb9e
--- /dev/null
+++ b/onnxruntime/test/perftest/tensorprotoutils.h
@@ -0,0 +1,36 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <vector>
+#include <type_traits>
+#include "core/session/onnxruntime_c_api.h"
+#include "core/session/onnxruntime_cxx_api.h"
+
+#include "mem_buffer.h"
+
+#include "onnx-ml.pb.h"
+
+namespace onnxruntime {
+namespace perftest {
+// How much memory it will need for putting the content of this tensor into a plain array
+// complex64/complex128 tensors are not supported.
+// The output value could be zero or -1.
+template <size_t alignment>
+common::Status GetSizeInBytesFromTensorProto(const onnx::TensorProto& tensor_proto, size_t* out);
+/**
+ * deserialize a TensorProto into a preallocated memory buffer.
+ *  Impl must correspond to onnxruntime/core/framework/tensorprotoutils.cc
+ * This implementation does not support external data so as to reduce dependency surface.
+ */
+common::Status TensorProtoToMLValue(const onnx::TensorProto& input, const MemBuffer& m, /* out */ Ort::Value& value);
+
+template <typename T>
+void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                  /*out*/ T* p_data, int64_t expected_size);
+
+ONNXTensorElementDataType CApiElementTypeFromProtoType(int type);
+ONNXTensorElementDataType GetTensorElementType(const onnx::TensorProto& tensor_proto);
+}  // namespace perftest
+}  // namespace onnxruntime
\ No newline at end of file

From ec4bc52e2e1a9dbacc53c9a52d8f4ac7d051d7da Mon Sep 17 00:00:00 2001
From: Pranav Sharma <prs@microsoft.com>
Date: Fri, 9 Aug 2019 00:30:21 -0700
Subject: [PATCH 3/5] fix linux build

---
 onnxruntime/test/perftest/tensorprotoutils.cc | 2 +-
 onnxruntime/test/perftest/tensorprotoutils.h  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/test/perftest/tensorprotoutils.cc b/onnxruntime/test/perftest/tensorprotoutils.cc
index 171e74d2d92e7..9ca9d53b5fd90 100644
--- a/onnxruntime/test/perftest/tensorprotoutils.cc
+++ b/onnxruntime/test/perftest/tensorprotoutils.cc
@@ -9,8 +9,8 @@
 #include <gsl/pointers>
 #include "core/framework/data_types.h"
 #include "core/framework/allocator.h"
-#include "onnx-ml.pb.h"
 #include "core/session/onnxruntime_cxx_api.h"
+#include "core/graph/onnx_protobuf.h"
 
 namespace onnxruntime {
 namespace perftest {
diff --git a/onnxruntime/test/perftest/tensorprotoutils.h b/onnxruntime/test/perftest/tensorprotoutils.h
index c9f954d0ebb9e..e4c18edaab3b0 100644
--- a/onnxruntime/test/perftest/tensorprotoutils.h
+++ b/onnxruntime/test/perftest/tensorprotoutils.h
@@ -10,7 +10,9 @@
 
 #include "mem_buffer.h"
 
-#include "onnx-ml.pb.h"
+namespace onnx {
+class TensorProto;
+}
 
 namespace onnxruntime {
 namespace perftest {

From f39ccd2706dd21ab01af9de1c79191f7fcac4dba Mon Sep 17 00:00:00 2001
From: Pranav Sharma <prs@microsoft.com>
Date: Fri, 9 Aug 2019 12:45:50 -0700
Subject: [PATCH 4/5] Avoid duplication

---
 cmake/onnxruntime_unittests.cmake             |   4 +-
 onnxruntime/test/onnx/TestCase.cc             |  25 +-
 onnxruntime/test/onnx/heap_buffer.cc          |   8 -
 onnxruntime/test/onnx/heap_buffer.h           |   5 -
 .../test/{perftest => onnx}/mem_buffer.h      |  10 +-
 onnxruntime/test/onnx/tensorprotoutils.cc     | 505 +++++---------
 onnxruntime/test/onnx/tensorprotoutils.h      |  64 +-
 onnxruntime/test/perftest/TestCase.cc         | 636 ------------------
 onnxruntime/test/perftest/TestCase.h          |  65 --
 onnxruntime/test/perftest/heap_buffer.cc      |  10 -
 onnxruntime/test/perftest/heap_buffer.h       |  26 -
 onnxruntime/test/perftest/tensorprotoutils.cc | 428 ------------
 onnxruntime/test/perftest/tensorprotoutils.h  |  38 --
 13 files changed, 217 insertions(+), 1607 deletions(-)
 rename onnxruntime/test/{perftest => onnx}/mem_buffer.h (72%)
 delete mode 100644 onnxruntime/test/perftest/TestCase.cc
 delete mode 100644 onnxruntime/test/perftest/TestCase.h
 delete mode 100644 onnxruntime/test/perftest/heap_buffer.cc
 delete mode 100644 onnxruntime/test/perftest/heap_buffer.h
 delete mode 100644 onnxruntime/test/perftest/tensorprotoutils.cc
 delete mode 100644 onnxruntime/test/perftest/tensorprotoutils.h

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 0a52b840cd1f2..0b3fad15d28ba 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -565,8 +565,8 @@ endif()
 onnxruntime_add_include_to_target(onnxruntime_perf_test gsl)
 
 if (onnxruntime_BUILD_SHARED_LIB)
-  set(onnxruntime_perf_test_libs onnxruntime_test_utils onnxruntime_common
-          onnx_test_data_proto onnx_proto libprotobuf ${GETOPT_LIB_WIDE} onnxruntime onnx
+  set(onnxruntime_perf_test_libs onnxruntime_test_utils onnx_test_runner_common onnxruntime_common
+          onnx_test_data_proto onnx_proto libprotobuf ${GETOPT_LIB_WIDE} onnxruntime
           ${SYS_PATH_LIB} ${CMAKE_DL_LIBS})
   if(onnxruntime_USE_NSYNC)
     list(APPEND onnxruntime_perf_test_libs nsync_cpp)
diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc
index 628ef479833d5..1fdb2cd9668c2 100644
--- a/onnxruntime/test/onnx/TestCase.cc
+++ b/onnxruntime/test/onnx/TestCase.cc
@@ -13,6 +13,7 @@
 #include "core/platform/ort_mutex.h"
 #include "core/session/onnxruntime_cxx_api.h"
 #include "core/framework/path_lib.h"
+#include "core/framework/allocator.h"
 #include <sstream>
 #include <map>
 #include <regex>
@@ -267,22 +268,18 @@ static void SortTensorFileNames(std::vector<std::basic_string<PATH_CHAR_TYPE>>&
 
 OrtValue* TensorToOrtValue(const ONNX_NAMESPACE::TensorProto& t, HeapBuffer& b) {
   size_t len = 0;
-  auto status = onnxruntime::test::utils::GetSizeInBytesFromTensorProto<0>(t, &len);
+  auto status = onnxruntime::test::GetSizeInBytesFromTensorProto<0>(t, &len);
   if (!status.IsOK()) {
     ORT_THROW(status.ToString());
   }
   void* p = len == 0 ? nullptr : b.AllocMemory(len);
-  auto d = std::make_unique<onnxruntime::OrtCallback>();
-  auto temp_value = std::make_unique<OrtValue>();
+  Ort::Value temp_value{nullptr};
   OrtAllocatorInfo cpu_allocator_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
-  status = onnxruntime::test::utils::TensorProtoToMLValue(Env::Default(), nullptr, t,
-                                                          MemBuffer(p, len, cpu_allocator_info), *temp_value, *d);
+  status = onnxruntime::test::TensorProtoToMLValue(t, onnxruntime::test::MemBuffer(p, len, cpu_allocator_info),
+                                                   temp_value);
   if (!status.IsOK()) {
     ORT_THROW(status.ToString());
   }
-  if (d->f) {
-    b.AddDeleter(d.release());
-  }
   return temp_value.release();
 }
 
@@ -582,22 +579,18 @@ void OnnxTestCase::ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto
     const ONNX_NAMESPACE::TensorProto& input = test_data_pbs[input_index];
     size_t len = 0;
 
-    auto status = onnxruntime::test::utils::GetSizeInBytesFromTensorProto<0>(input, &len);
+    auto status = onnxruntime::test::GetSizeInBytesFromTensorProto<0>(input, &len);
     if (!status.IsOK()) {
       ORT_THROW(status.ToString());
     }
     void* p = len == 0 ? nullptr : b.AllocMemory(len);
-    auto d = std::make_unique<onnxruntime::OrtCallback>();
+    Ort::Value v1{nullptr};
     OrtAllocatorInfo cpu_allocator_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
-    auto v1 = std::make_unique<OrtValue>();
-    status = onnxruntime::test::utils::TensorProtoToMLValue(Env::Default(), nullptr, input,
-                                                            MemBuffer(p, len, cpu_allocator_info), *v1, *d);
+    status = onnxruntime::test::TensorProtoToMLValue(input, onnxruntime::test::MemBuffer(p, len, cpu_allocator_info),
+                                                     v1);
     if (!status.IsOK()) {
       ORT_THROW(status.ToString());
     }
-    if (d->f) {
-      b.AddDeleter(d.release());
-    }
     out.insert(std::make_pair(name, v1.release()));
   }
 }
diff --git a/onnxruntime/test/onnx/heap_buffer.cc b/onnxruntime/test/onnx/heap_buffer.cc
index 4d036d5033b71..e2b7d8dac826f 100644
--- a/onnxruntime/test/onnx/heap_buffer.cc
+++ b/onnxruntime/test/onnx/heap_buffer.cc
@@ -2,17 +2,9 @@
 // Licensed under the MIT License.
 
 #include "heap_buffer.h"
-#include "core/framework/callback.h"
 #include "core/session/onnxruntime_c_api.h"
 
-void HeapBuffer::AddDeleter(onnxruntime::OrtCallback* d) {
-  if (d != nullptr) deleters_.push_back(d);
-}
-
 HeapBuffer::~HeapBuffer() {
-  for (auto d : deleters_) {
-    onnxruntime::OrtRunCallback(d);
-  }
   for (void* p : buffers_) {
     free(p);
   }
diff --git a/onnxruntime/test/onnx/heap_buffer.h b/onnxruntime/test/onnx/heap_buffer.h
index 3726bfe880d0a..f888818ef1f4c 100644
--- a/onnxruntime/test/onnx/heap_buffer.h
+++ b/onnxruntime/test/onnx/heap_buffer.h
@@ -4,9 +4,6 @@
 #pragma once
 #include <vector>
 #include <memory>
-namespace onnxruntime {
-struct OrtCallback;
-}
 
 /**
  * A holder for delay freed buffers
@@ -23,9 +20,7 @@ class HeapBuffer {
     buffers_.push_back(p);
     return p;
   }
-  void AddDeleter(onnxruntime::OrtCallback* d);
 
  private:
-  std::vector<onnxruntime::OrtCallback*> deleters_;
   std::vector<void*> buffers_;
 };
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/mem_buffer.h b/onnxruntime/test/onnx/mem_buffer.h
similarity index 72%
rename from onnxruntime/test/perftest/mem_buffer.h
rename to onnxruntime/test/onnx/mem_buffer.h
index d5b7a03de7a7f..fe2a393852756 100644
--- a/onnxruntime/test/perftest/mem_buffer.h
+++ b/onnxruntime/test/onnx/mem_buffer.h
@@ -1,8 +1,14 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 #pragma once
 #include "core/common/common.h"
 
 namespace onnxruntime {
-namespace perftest {
+namespace test {
+/**
+ * A simple POD for using with tensor deserialization
+ */
 class MemBuffer {
  public:
   MemBuffer(void* buffer, size_t len, const OrtAllocatorInfo& alloc_info)
@@ -17,5 +23,5 @@ class MemBuffer {
   const size_t len_;
   const OrtAllocatorInfo& alloc_info_;
 };
-}  // namespace perftest
+};  // namespace test
 }  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/test/onnx/tensorprotoutils.cc b/onnxruntime/test/onnx/tensorprotoutils.cc
index d574f3ec76c3f..ccff84e7841c3 100644
--- a/onnxruntime/test/onnx/tensorprotoutils.cc
+++ b/onnxruntime/test/onnx/tensorprotoutils.cc
@@ -7,22 +7,13 @@
 #include <algorithm>
 #include <limits>
 #include <gsl/pointers>
-
-#include "core/common/logging/logging.h"
-#include "core/graph/onnx_protobuf.h"
-#include "core/framework/op_kernel.h"
-#include "core/framework/tensor.h"
-#include "core/framework/ort_value_pattern_planner.h"
-#include "core/framework/allocator.h"
-#include "core/framework/callback.h"
 #include "core/framework/data_types.h"
-#include "core/framework/path_lib.h"
-
-using namespace ONNX_NAMESPACE;
-using namespace ::onnxruntime::common;
-
-namespace {
+#include "core/framework/allocator.h"
+#include "core/session/onnxruntime_cxx_api.h"
+#include "core/graph/onnx_protobuf.h"
 
+namespace onnxruntime {
+namespace test {
 #ifdef __GNUC__
 constexpr inline bool IsLittleEndianOrder() noexcept { return __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; }
 #else
@@ -34,7 +25,38 @@ inline bool IsLittleEndianOrder() noexcept {
 }
 #endif
 
-std::vector<int64_t> GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto) {
+//From core common
+inline void MakeStringInternal(std::ostringstream& /*ss*/) noexcept {
+}
+
+template <typename T>
+inline void MakeStringInternal(std::ostringstream& ss, const T& t) noexcept {
+  ss << t;
+}
+
+template <typename T, typename... Args>
+inline void MakeStringInternal(std::ostringstream& ss, const T& t, const Args&... args) noexcept {
+  ::onnxruntime::MakeStringInternal(ss, t);
+  ::onnxruntime::MakeStringInternal(ss, args...);
+}
+
+template <typename... Args>
+std::string MakeString(const Args&... args) {
+  std::ostringstream ss;
+  ::onnxruntime::MakeStringInternal(ss, args...);
+  return std::string(ss.str());
+}
+
+// Specializations for already-a-string types.
+template <>
+inline std::string MakeString(const std::string& str) {
+  return str;
+}
+inline std::string MakeString(const char* p_str) {
+  return p_str;
+}
+
+std::vector<int64_t> GetTensorShapeFromTensorProto(const onnx::TensorProto& tensor_proto) {
   const auto& dims = tensor_proto.dims();
   std::vector<int64_t> tensor_shape_vec(static_cast<size_t>(dims.size()));
   for (int i = 0; i < dims.size(); ++i) {
@@ -46,8 +68,8 @@ std::vector<int64_t> GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorP
 
 // This function doesn't support string tensors
 template <typename T>
-static Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length, size_t expected_size,
-                                      /*out*/ T* p_data) {
+static void UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length, size_t expected_size,
+                                    /*out*/ T* p_data) {
   // allow this low level routine to be somewhat unsafe. assuming it's thoroughly tested and valid
   GSL_SUPPRESS(type)       // type.1 reinterpret-cast; type.4 C-style casts; type.5 'T result;' is uninitialized;
   GSL_SUPPRESS(bounds .1)  // pointer arithmetic
@@ -55,12 +77,12 @@ static Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_leng
   {
     size_t expected_size_in_bytes;
     if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_size, sizeof(T), &expected_size_in_bytes)) {
-      return Status(onnxruntime::common::ONNXRUNTIME, onnxruntime::common::INVALID_ARGUMENT, "size overflow");
+      throw Ort::Exception("size overflow", OrtErrorCode::ORT_FAIL);
     }
     if (raw_data_length != expected_size_in_bytes)
-      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                             "UnpackTensor: the pre-allocated size does not match the raw data size, expected ",
-                             expected_size_in_bytes, ", got ", raw_data_length);
+      throw Ort::Exception(MakeString("UnpackTensor: the pre-allocated size does not match the raw data size, expected ",
+                                      expected_size_in_bytes, ", got ", raw_data_length),
+                           OrtErrorCode::ORT_FAIL);
     if (IsLittleEndianOrder()) {
       memcpy(p_data, raw_data, raw_data_length);
     } else {
@@ -75,85 +97,81 @@ static Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_leng
         p_data[i] = result;
       }
     }
-    return Status::OK();
   }
 }
-}  // namespace
-
-namespace onnxruntime {
-namespace test {
-namespace utils {
 
 // This macro doesn't work for Float16/bool/string tensors
-#define DEFINE_UNPACK_TENSOR(T, Type, field_name, field_size)                                                             \
-  template <>                                                                                                             \
-  Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,               \
-                      /*out*/ T* p_data, int64_t expected_size) {                                                         \
-    if (nullptr == p_data) {                                                                                              \
-      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                                       \
-      if (size == 0) return Status::OK();                                                                                 \
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                                       \
-    }                                                                                                                     \
-    if (nullptr == p_data || Type != tensor.data_type()) {                                                                \
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                                       \
-    }                                                                                                                     \
-    if (raw_data != nullptr) {                                                                                            \
-      return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);                                      \
-    }                                                                                                                     \
-    if (tensor.field_size() != expected_size)                                                                             \
-      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "corrupted protobuf data: tensor shape size(", expected_size, \
-                             ") does not match the data size(", tensor.field_size(), ") in proto");                       \
-    auto& data = tensor.field_name();                                                                                     \
-    for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                                           \
-      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                                 \
-    return Status::OK();                                                                                                  \
+#define DEFINE_UNPACK_TENSOR(T, Type, field_name, field_size)                                                \
+  template <>                                                                                                \
+  void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,              \
+                    /*out*/ T* p_data, int64_t expected_size) {                                              \
+    if (nullptr == p_data) {                                                                                 \
+      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                          \
+      if (size == 0) return;                                                                                 \
+      throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);                                          \
+    }                                                                                                        \
+    if (nullptr == p_data || Type != tensor.data_type()) {                                                   \
+      throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);                                          \
+    }                                                                                                        \
+    if (raw_data != nullptr) {                                                                               \
+      UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);                                \
+      return;                                                                                                \
+    }                                                                                                        \
+    if (tensor.field_size() != expected_size)                                                                \
+      throw Ort::Exception(MakeString("corrupted protobuf data: tensor shape size(", expected_size,          \
+                                      ") does not match the data size(", tensor.field_size(), ") in proto"), \
+                           OrtErrorCode::ORT_FAIL);                                                          \
+    auto& data = tensor.field_name();                                                                        \
+    for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                              \
+      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                    \
+    return;                                                                                                  \
   }
 
 // TODO: complex64 complex128
-DEFINE_UNPACK_TENSOR(float, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, float_data, float_data_size)
-DEFINE_UNPACK_TENSOR(double, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, double_data, double_data_size);
-DEFINE_UNPACK_TENSOR(uint8_t, ONNX_NAMESPACE::TensorProto_DataType_UINT8, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int8_t, ONNX_NAMESPACE::TensorProto_DataType_INT8, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int16_t, ONNX_NAMESPACE::TensorProto_DataType_INT16, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(uint16_t, ONNX_NAMESPACE::TensorProto_DataType_UINT16, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int32_t, ONNX_NAMESPACE::TensorProto_DataType_INT32, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int64_t, ONNX_NAMESPACE::TensorProto_DataType_INT64, int64_data, int64_data_size)
-DEFINE_UNPACK_TENSOR(uint64_t, ONNX_NAMESPACE::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
-DEFINE_UNPACK_TENSOR(uint32_t, ONNX_NAMESPACE::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)
+DEFINE_UNPACK_TENSOR(float, onnx::TensorProto_DataType_FLOAT, float_data, float_data_size)
+DEFINE_UNPACK_TENSOR(double, onnx::TensorProto_DataType_DOUBLE, double_data, double_data_size);
+DEFINE_UNPACK_TENSOR(uint8_t, onnx::TensorProto_DataType_UINT8, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(int8_t, onnx::TensorProto_DataType_INT8, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(int16_t, onnx::TensorProto_DataType_INT16, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(uint16_t, onnx::TensorProto_DataType_UINT16, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(int32_t, onnx::TensorProto_DataType_INT32, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR(int64_t, onnx::TensorProto_DataType_INT64, int64_data, int64_data_size)
+DEFINE_UNPACK_TENSOR(uint64_t, onnx::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
+DEFINE_UNPACK_TENSOR(uint32_t, onnx::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)
 
 // doesn't support raw data
 template <>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
-                    /*out*/ std::string* p_data, int64_t expected_size) {
+void UnpackTensor(const onnx::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
+                  /*out*/ std::string* p_data, int64_t expected_size) {
   if (nullptr == p_data) {
-    if (tensor.string_data_size() == 0) return Status::OK();
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
+    if (tensor.string_data_size() == 0) return;
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
   }
-  if (ONNX_NAMESPACE::TensorProto_DataType_STRING != tensor.data_type()) {
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
+  if (onnx::TensorProto_DataType_STRING != tensor.data_type()) {
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
   }
 
   if (tensor.string_data_size() != expected_size)
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
-                  "UnpackTensor: the pre-allocate size does not match the size in proto");
+    throw Ort::Exception(
+        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
 
   auto& string_data = tensor.string_data();
   for (const auto& iter : string_data) {
     *p_data++ = iter;
   }
 
-  return Status::OK();
+  return;
 }
 template <>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                    /*out*/ bool* p_data, int64_t expected_size) {
+void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                  /*out*/ bool* p_data, int64_t expected_size) {
   if (nullptr == p_data) {
     const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
-    if (size == 0) return Status::OK();
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
+    if (size == 0) return;
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
   }
-  if (ONNX_NAMESPACE::TensorProto_DataType_BOOL != tensor.data_type()) {
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
+  if (onnx::TensorProto_DataType_BOOL != tensor.data_type()) {
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
   }
 
   if (raw_data != nullptr) {
@@ -161,24 +179,24 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
   }
 
   if (tensor.int32_data_size() != expected_size)
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
-                  "UnpackTensor: the pre-allocate size does not match the size in proto");
+    throw Ort::Exception(
+        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
   for (int iter : tensor.int32_data()) {
     *p_data++ = static_cast<bool>(iter);
   }
 
-  return Status::OK();
+  return;
 }
 template <>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                    /*out*/ MLFloat16* p_data, int64_t expected_size) {
+void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                  /*out*/ MLFloat16* p_data, int64_t expected_size) {
   if (nullptr == p_data) {
     const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
-    if (size == 0) return Status::OK();
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
+    if (size == 0) return;
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
   }
-  if (ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 != tensor.data_type()) {
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
+  if (onnx::TensorProto_DataType_FLOAT16 != tensor.data_type()) {
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
   }
 
   if (raw_data != nullptr) {
@@ -186,33 +204,34 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
   }
 
   if (tensor.int32_data_size() != expected_size)
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
-                  "UnpackTensor: the pre-allocate size does not match the size in proto");
+    throw Ort::Exception(
+        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
 
   constexpr int max_value = std::numeric_limits<uint16_t>::max();
   for (int i = 0; i < static_cast<int>(expected_size); i++) {
     int v = tensor.int32_data()[i];
     if (v < 0 || v > max_value) {
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "data overflow");
+      throw Ort::Exception(
+          "data overflow", OrtErrorCode::ORT_FAIL);
     }
     p_data[i] = MLFloat16(static_cast<uint16_t>(v));
   }
 
-  return Status::OK();
+  return;
 }
 
 template <>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                    /*out*/ BFloat16* p_data, int64_t expected_size) {
+void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                  /*out*/ BFloat16* p_data, int64_t expected_size) {
   if (nullptr == p_data) {
     const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
     if (size == 0)
-      return Status::OK();
+      return;
 
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
   }
-  if (ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16 != tensor.data_type()) {
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
+  if (onnx::TensorProto_DataType_BFLOAT16 != tensor.data_type()) {
+    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
   }
 
   if (raw_data != nullptr) {
@@ -220,38 +239,39 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
   }
 
   if (tensor.int32_data_size() != expected_size)
-    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
-                  "UnpackTensor: the pre-allocate size does not match the size in proto");
+    throw Ort::Exception(
+        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
 
   constexpr int max_value = std::numeric_limits<uint16_t>::max();
   for (int i = 0; i < static_cast<int>(expected_size); i++) {
     int v = tensor.int32_data()[i];
     if (v < 0 || v > max_value) {
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "data overflow");
+      throw Ort::Exception(
+          "data overflow", OrtErrorCode::ORT_FAIL);
     }
     p_data[i] = BFloat16(static_cast<uint16_t>(v));
   }
 
-  return Status::OK();
+  return;
 }
 
-#define CASE_PROTO_TRACE(X, Y)                                                                     \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:                             \
-    if (!IAllocator::CalcMemSizeForArrayWithAlignment<alignment>(size, sizeof(Y), out)) {          \
-      return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto"); \
-    }                                                                                              \
+#define CASE_PROTO_TRACE(X, Y)                                                            \
+  case onnx::TensorProto_DataType::TensorProto_DataType_##X:                              \
+    if (!IAllocator::CalcMemSizeForArrayWithAlignment<alignment>(size, sizeof(Y), out)) { \
+      throw Ort::Exception("Invalid TensorProto", OrtErrorCode::ORT_FAIL);                \
+    }                                                                                     \
     break;
 
 template <size_t alignment>
-common::Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out) {
+Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out) {
   const auto& dims = tensor_proto.dims();
   size_t size = 1;
   for (google::protobuf::int64 dim : dims) {
     if (dim < 0 || static_cast<uint64_t>(dim) >= std::numeric_limits<size_t>::max()) {
-      return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
     }
     if (!IAllocator::CalcMemSizeForArray(size, static_cast<size_t>(dim), &size)) {
-      return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
     }
   }
   switch (tensor_proto.data_type()) {
@@ -270,164 +290,102 @@ common::Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto&
     CASE_PROTO_TRACE(BFLOAT16, BFloat16);
     CASE_PROTO_TRACE(STRING, std::string);
     default:
-      return common::Status(common::ONNXRUNTIME, common::NOT_IMPLEMENTED);
+      return Status(common::ONNXRUNTIME, common::NOT_IMPLEMENTED);
   }
   return Status::OK();
 }
 
-TensorShape GetTensorShapeFromTensorShapeProto(const ONNX_NAMESPACE::TensorShapeProto& tensor_shape_proto) {
-  const auto& dims = tensor_shape_proto.dim();
-  std::vector<int64_t> tensor_shape_vec(static_cast<size_t>(dims.size()));
-  for (int i = 0; i < dims.size(); ++i) {
-    tensor_shape_vec[i] = dims[i].has_dim_value() ? dims[i].dim_value()
-                                                  : -1; /* symbolic dimensions are represented as -1 in onnxruntime*/
-  }
-  return TensorShape(std::move(tensor_shape_vec));
-}
-
 struct UnInitializeParam {
   void* preallocated;
   size_t preallocated_size;
   ONNXTensorElementDataType ele_type;
 };
 
-// In the future, we may make these two function as public C API
-/**
- *  Initialize a buffer for being used with the OrtCreateTensorWithDataAsOrtValue function
- *
- */
-ORT_API_STATUS(OrtInitializeBufferForTensor, _In_opt_ void* input, size_t input_len,
-               enum ONNXTensorElementDataType type);
-
-/**
- * Uninitialize the buffer that was initialized by the OrtInitializeBufferForTensor function
- *
- */
-ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enum ONNXTensorElementDataType type);
-
-static void UnInitTensor(void* param) noexcept {
-  UnInitializeParam* p = reinterpret_cast<UnInitializeParam*>(param);
-  OrtUninitializeBuffer(p->preallocated, p->preallocated_size, p->ele_type);
-  delete p;
-}
-
-ORT_API_STATUS_IMPL(OrtInitializeBufferForTensor, _In_opt_ void* input, size_t input_len,
-                    enum ONNXTensorElementDataType type) {
+void OrtInitializeBufferForTensor(void* input, size_t input_len,
+                                  ONNXTensorElementDataType type) {
   try {
-    if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return nullptr;
+    if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return;
     size_t tensor_size = input_len / sizeof(std::string);
     std::string* ptr = reinterpret_cast<std::string*>(input);
     for (size_t i = 0, n = tensor_size; i < n; ++i) {
       new (ptr + i) std::string();
     }
   } catch (std::exception& ex) {
-    return OrtCreateStatus(ORT_RUNTIME_EXCEPTION, ex.what());
-  }
-  return nullptr;
-}
-
-ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enum ONNXTensorElementDataType type) {
-  if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return;
-  size_t tensor_size = input_len / sizeof(std::string);
-  std::string* ptr = reinterpret_cast<std::string*>(input);
-  using std::string;
-  for (size_t i = 0, n = tensor_size; i < n; ++i) {
-    ptr[i].~string();
+    throw Ort::Exception(ex.what(), OrtErrorCode::ORT_RUNTIME_EXCEPTION);
   }
+  return;
 }
 
-#define CASE_PROTO(X, Y)                                                                                                   \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:                                                     \
-    ORT_RETURN_IF_ERROR(                                                                                                   \
-        ::onnxruntime::test::utils::UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, tensor_size)); \
+#define CASE_PROTO(X, Y)                                                                                       \
+  case onnx::TensorProto_DataType::TensorProto_DataType_##X:                                                   \
+    ::onnxruntime::test::UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, tensor_size); \
     break;
 
-class AutoDelete {
- public:
-  OrtCallback d{nullptr, nullptr};
-  AutoDelete() = default;
-  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(AutoDelete);
-  ~AutoDelete() {
-    if (d.f != nullptr) {
-      d.f(d.param);
-    }
+#define CASE_TYPE(X)                   \
+  case onnx::TensorProto_DataType_##X: \
+    return ONNX_TENSOR_ELEMENT_DATA_TYPE_##X;
+
+ONNXTensorElementDataType CApiElementTypeFromProtoType(int type) {
+  switch (type) {
+    CASE_TYPE(FLOAT)
+    CASE_TYPE(UINT8)
+    CASE_TYPE(INT8)
+    CASE_TYPE(UINT16)
+    CASE_TYPE(INT16)
+    CASE_TYPE(INT32)
+    CASE_TYPE(INT64)
+    CASE_TYPE(STRING)
+    CASE_TYPE(BOOL)
+    CASE_TYPE(FLOAT16)
+    CASE_TYPE(DOUBLE)
+    CASE_TYPE(UINT32)
+    CASE_TYPE(UINT64)
+    CASE_TYPE(COMPLEX64)
+    CASE_TYPE(COMPLEX128)
+    CASE_TYPE(BFLOAT16)
+    default:
+      return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
   }
-};
+}
 
-static void MoveOrtCallback(OrtCallback& from, OrtCallback& to) {
-  to.f = from.f;
-  to.param = from.param;
-  from.f = nullptr;
-  from.param = nullptr;
+ONNXTensorElementDataType GetTensorElementType(const onnx::TensorProto& tensor_proto) {
+  return CApiElementTypeFromProtoType(tensor_proto.data_type());
 }
 
-Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
-                            const ONNX_NAMESPACE::TensorProto& tensor_proto, const MemBuffer& m, OrtValue& value,
-                            OrtCallback& deleter) {
+Status TensorProtoToMLValue(const onnx::TensorProto& tensor_proto, const MemBuffer& m, Ort::Value& value) {
   const OrtAllocatorInfo& allocator = m.GetAllocInfo();
-  ONNXTensorElementDataType ele_type = onnxruntime::test::utils::GetTensorElementType(tensor_proto);
-  deleter.f = nullptr;
-  deleter.param = nullptr;
+  ONNXTensorElementDataType ele_type = test::GetTensorElementType(tensor_proto);
   const void* raw_data = nullptr;
   size_t raw_data_len = 0;
-  const DataTypeImpl* const type = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
-  AutoDelete deleter_for_file_data;
   void* tensor_data;
   {
-    if (tensor_proto.data_location() == TensorProto_DataLocation_EXTERNAL) {
-      if (ele_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING)
-        return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "string tensor can not have raw data");
-
-      std::unique_ptr<ExternalDataInfo> external_data_info;
-      ORT_RETURN_IF_ERROR(ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info));
-      std::basic_string<ORTCHAR_T> full_path;
-      if (tensor_proto_path != nullptr) {
-        ORT_RETURN_IF_ERROR(GetDirNameFromFilePath(tensor_proto_path, full_path));
-        full_path = ConcatPathComponent<ORTCHAR_T>(full_path, external_data_info->GetRelPath());
-      } else {
-        full_path = external_data_info->GetRelPath();
-      }
-      raw_data_len = external_data_info->GetLength();
-      // load the file
-      {
-        void* file_data;
-        ORT_RETURN_IF_ERROR(env.ReadFileAsString(full_path.c_str(), external_data_info->GetOffset(),
-                                                 file_data, raw_data_len, deleter_for_file_data.d));
-        raw_data = file_data;
-      }
+    if (tensor_proto.data_location() == onnx::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL) {
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Server doesn't support external data.");
     } else if (tensor_proto.has_raw_data()) {
       if (ele_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING)
-        return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "string tensor can not have raw data");
+        return Status(common::ONNXRUNTIME, common::FAIL, "String tensor cannot have raw data.");
       raw_data = tensor_proto.raw_data().data();
       raw_data_len = tensor_proto.raw_data().size();
     }
-    if (IsLittleEndianOrder() && raw_data != nullptr && deleter_for_file_data.d.f != nullptr) {
-      tensor_data = const_cast<void*>(raw_data);
-      MoveOrtCallback(deleter_for_file_data.d, deleter);
-    } else {
+    {
       void* preallocated = m.GetBuffer();
       size_t preallocated_size = m.GetLen();
       int64_t tensor_size = 1;
       {
         for (auto i : tensor_proto.dims()) {
-          if (i < 0) return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "tensor can't contain negative dims");
+          if (i < 0) return Status(common::ONNXRUNTIME, common::FAIL, "Tensor can't contain negative dims");
           tensor_size *= i;
         }
       }
       // tensor_size could be zero. see test_slice_start_out_of_bounds\test_data_set_0\output_0.pb
       if (static_cast<uint64_t>(tensor_size) > SIZE_MAX) {
-        return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "size overflow");
+        return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Size overflow");
       }
       size_t size_to_allocate;
-      if (!IAllocator::CalcMemSizeForArrayWithAlignment<0>(static_cast<size_t>(tensor_size), type->Size(),
-                                                           &size_to_allocate)) {
-        return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "size overflow");
-      }
+      GetSizeInBytesFromTensorProto<0>(tensor_proto, &size_to_allocate);
 
       if (preallocated && preallocated_size < size_to_allocate)
-        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                               "The buffer planner is not consistent with tensor buffer size, expected ",
-                               size_to_allocate, ", got ", preallocated_size);
+        return Status(common::ONNXRUNTIME, common::FAIL, MakeString("The buffer planner is not consistent with tensor buffer size, expected ", size_to_allocate, ", got ", preallocated_size));
       switch (tensor_proto.data_type()) {
         CASE_PROTO(FLOAT, float);
         CASE_PROTO(DOUBLE, double);
@@ -442,24 +400,17 @@ Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
         CASE_PROTO(UINT64, uint64_t);
         CASE_PROTO(FLOAT16, MLFloat16);
         CASE_PROTO(BFLOAT16, BFloat16);
-        case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_STRING:
+        case onnx::TensorProto_DataType::TensorProto_DataType_STRING:
           if (preallocated != nullptr) {
-            OrtStatus* status = OrtInitializeBufferForTensor(preallocated, preallocated_size, ele_type);
-            if (status != nullptr) {
-              OrtReleaseStatus(status);
-              return Status(common::ONNXRUNTIME, common::FAIL, "initialize preallocated buffer failed");
-            }
-
-            deleter.f = UnInitTensor;
-            deleter.param = new UnInitializeParam{preallocated, preallocated_size, ele_type};
+            OrtInitializeBufferForTensor(preallocated, preallocated_size, ele_type);
           }
-          ORT_RETURN_IF_ERROR(::onnxruntime::test::utils::UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
-                                                                                    (std::string*)preallocated, tensor_size));
+          ::onnxruntime::test::UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
+                                                         (std::string*)preallocated, tensor_size);
           break;
         default: {
           std::ostringstream ostr;
           ostr << "Initialized tensor with unexpected type: " << tensor_proto.data_type();
-          return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
+          return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
         }
       }
       tensor_data = preallocated;
@@ -467,105 +418,11 @@ Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
   }
   std::vector<int64_t> tensor_shape_vec = GetTensorShapeFromTensorProto(tensor_proto);
   // Note: We permit an empty tensor_shape_vec, and treat it as a scalar (a tensor of size 1).
-  TensorShape tensor_shape{tensor_shape_vec};
-  value.Init(new Tensor(type, tensor_shape, tensor_data, allocator), DataTypeImpl::GetType<Tensor>(),
-             DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
+  value = Ort::Value::CreateTensor(&allocator, tensor_data, m.GetLen(), tensor_shape_vec.data(), tensor_shape_vec.size(), (ONNXTensorElementDataType)tensor_proto.data_type());
   return Status::OK();
 }
-
-#define CASE_TYPE(X)                             \
-  case ONNX_NAMESPACE::TensorProto_DataType_##X: \
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_##X;
-
-ONNXTensorElementDataType CApiElementTypeFromProtoType(int type) {
-  switch (type) {
-    CASE_TYPE(FLOAT)
-    CASE_TYPE(UINT8)
-    CASE_TYPE(INT8)
-    CASE_TYPE(UINT16)
-    CASE_TYPE(INT16)
-    CASE_TYPE(INT32)
-    CASE_TYPE(INT64)
-    CASE_TYPE(STRING)
-    CASE_TYPE(BOOL)
-    CASE_TYPE(FLOAT16)
-    CASE_TYPE(DOUBLE)
-    CASE_TYPE(UINT32)
-    CASE_TYPE(UINT64)
-    CASE_TYPE(COMPLEX64)
-    CASE_TYPE(COMPLEX128)
-    CASE_TYPE(BFLOAT16)
-    default:
-      return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
-  }
-}
-
-ONNXTensorElementDataType GetTensorElementType(const ONNX_NAMESPACE::TensorProto& tensor_proto) {
-  return CApiElementTypeFromProtoType(tensor_proto.data_type());
-}
-
-TensorProto::DataType GetTensorProtoType(const Tensor& tensor) {
-  auto tensor_type = tensor.DataType();
-  TensorProto::DataType dtype = TensorProto_DataType_UNDEFINED;
-
-  if (tensor_type == DataTypeImpl::GetType<float>())
-    dtype = TensorProto_DataType_FLOAT;
-  else if (tensor_type == DataTypeImpl::GetType<double>())
-    dtype = TensorProto_DataType_DOUBLE;
-  else if (tensor_type == DataTypeImpl::GetType<int8_t>())
-    dtype = TensorProto_DataType_INT8;
-  else if (tensor_type == DataTypeImpl::GetType<int16_t>())
-    dtype = TensorProto_DataType_INT16;
-  else if (tensor_type == DataTypeImpl::GetType<int32_t>())
-    dtype = TensorProto_DataType_INT32;
-  else if (tensor_type == DataTypeImpl::GetType<int64_t>())
-    dtype = TensorProto_DataType_INT64;
-  else if (tensor_type == DataTypeImpl::GetType<uint8_t>())
-    dtype = TensorProto_DataType_UINT8;
-  else if (tensor_type == DataTypeImpl::GetType<uint16_t>())
-    dtype = TensorProto_DataType_UINT16;
-  else if (tensor_type == DataTypeImpl::GetType<uint32_t>())
-    dtype = TensorProto_DataType_UINT32;
-  else if (tensor_type == DataTypeImpl::GetType<uint64_t>())
-    dtype = TensorProto_DataType_UINT64;
-  else if (tensor_type == DataTypeImpl::GetType<bool>())
-    dtype = TensorProto_DataType_BOOL;
-  else if (tensor_type == DataTypeImpl::GetType<MLFloat16>())
-    dtype = TensorProto_DataType_FLOAT16;
-  else if (tensor_type == DataTypeImpl::GetType<BFloat16>())
-    dtype = TensorProto_DataType_BFLOAT16;
-
-  return dtype;
-}
-
-ONNX_NAMESPACE::TensorProto TensorToTensorProto(const Tensor& tensor, const std::string& tensor_proto_name,
-                                                const ONNX_NAMESPACE::TypeProto& tensor_proto_type) {
-  // Given we are using the raw_data field in the protobuf, this will work only for little-endian format.
-  ORT_ENFORCE(IsLittleEndianOrder());
-
-  // Set name, dimensions, type, and data of the TensorProto.
-  ONNX_NAMESPACE::TensorProto tensor_proto;
-
-  tensor_proto.set_name(tensor_proto_name);
-
-  for (auto& dim : tensor.Shape().GetDims()) {
-    tensor_proto.add_dims(dim);
-  }
-
-  // TODO Once utils::GetTensorProtoType supports all data types, you can get the tensor proto type from the tensor,
-  // as follows (which will allow us to get rid of the tensor_proto_type argument).
-  //tensor_proto.set_data_type(utils::GetTensorProtoType(tensor));
-
-  tensor_proto.set_data_type(tensor_proto_type.tensor_type().elem_type());
-
-  tensor_proto.set_raw_data(tensor.DataRaw(), tensor.SizeInBytes());
-
-  return tensor_proto;
-}
-
-template common::Status GetSizeInBytesFromTensorProto<256>(const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                                                           size_t* out);
-template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);
-}  // namespace utils
+template Status GetSizeInBytesFromTensorProto<256>(const onnx::TensorProto& tensor_proto,
+                                                   size_t* out);
+template Status GetSizeInBytesFromTensorProto<0>(const onnx::TensorProto& tensor_proto, size_t* out);
 }  // namespace test
-}  // namespace onnxruntime
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/test/onnx/tensorprotoutils.h b/onnxruntime/test/onnx/tensorprotoutils.h
index 18c171d4d3cab..c44d2e2132992 100644
--- a/onnxruntime/test/onnx/tensorprotoutils.h
+++ b/onnxruntime/test/onnx/tensorprotoutils.h
@@ -5,64 +5,34 @@
 
 #include <vector>
 #include <type_traits>
-
-#include "core/common/common.h"
-#include "core/common/status.h"
-#include "core/framework/allocator.h"
-#include "core/framework/ml_value.h"
-#include "core/framework/mem_buffer.h"
-#include "core/framework/tensor_external_data_info.h"
 #include "core/session/onnxruntime_c_api.h"
-#include "core/graph/onnx_protobuf.h"
-#include "core/platform/env.h"
+#include "core/session/onnxruntime_cxx_api.h"
+
+#include "mem_buffer.h"
 
-namespace ONNX_NAMESPACE {
+namespace onnx {
 class TensorProto;
-class TensorShapeProto;
-}  // namespace ONNX_NAMESPACE
+}
 
 namespace onnxruntime {
-class Tensor;
 namespace test {
-namespace utils {
-TensorShape GetTensorShapeFromTensorShapeProto(const ONNX_NAMESPACE::TensorShapeProto& tensor_shape_proto);
-/**
- * deserialize a TensorProto into a preallocated memory buffer.
- * \param tensor_proto_path A local file path of where the 'input' was loaded from. Can be NULL if the tensor proto doesn't
- *                        have any external data or it was loaded from current working dir. This path could be either a
- *                        relative path or an absolute path.
- */
-common::Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
-                                    const ONNX_NAMESPACE::TensorProto& input, const MemBuffer& m, OrtValue& value,
-                                    OrtCallback& deleter);
-// This function doesn't support string tensors
-ONNX_NAMESPACE::TensorProto::DataType GetTensorProtoType(const Tensor& tensor);
-
-/** Creates a TensorProto from a Tensor.
-    @param[in] tensor the Tensor whose data and shape will be used to create the TensorProto.
-    @param[in] tensor_proto_name the name of the TensorProto.
-    @param[in] tensor_proto_type the type of the TensorProto.
-    @return the TensorProto. 
-    
-    Note: Method currently requires that data is in little-endian format.
-    TODO Once the GetTensorProtoType supports all data types, we can remove the tensor_proto_type parameter and 
-    instead get the type from the tensor. */
-ONNX_NAMESPACE::TensorProto TensorToTensorProto(const Tensor& tensor, const std::string& tensor_proto_name,
-                                                const ONNX_NAMESPACE::TypeProto& tensor_proto_type);
-
-ONNXTensorElementDataType CApiElementTypeFromProtoType(int type);
-ONNXTensorElementDataType GetTensorElementType(const ONNX_NAMESPACE::TensorProto& tensor_proto);
-
 // How much memory it will need for putting the content of this tensor into a plain array
 // complex64/complex128 tensors are not supported.
 // The output value could be zero or -1.
 template <size_t alignment>
-common::Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);
+common::Status GetSizeInBytesFromTensorProto(const onnx::TensorProto& tensor_proto, size_t* out);
+/**
+ * deserialize a TensorProto into a preallocated memory buffer.
+ *  Impl must correspond to onnxruntime/core/framework/tensorprotoutils.cc
+ * This implementation does not support external data so as to reduce dependency surface.
+ */
+common::Status TensorProtoToMLValue(const onnx::TensorProto& input, const MemBuffer& m, /* out */ Ort::Value& value);
 
 template <typename T>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                    /*out*/ T* p_data, int64_t expected_size);
+void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                  /*out*/ T* p_data, int64_t expected_size);
 
-}  // namespace utils
+ONNXTensorElementDataType CApiElementTypeFromProtoType(int type);
+ONNXTensorElementDataType GetTensorElementType(const onnx::TensorProto& tensor_proto);
 }  // namespace test
-}  // namespace onnxruntime
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/TestCase.cc b/onnxruntime/test/perftest/TestCase.cc
deleted file mode 100644
index 238530172fac5..0000000000000
--- a/onnxruntime/test/perftest/TestCase.cc
+++ /dev/null
@@ -1,636 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// needs to be included first to get around onnxruntime\cmake\external\onnx\onnx/common/constants.h(14): error C2513: 'bool': no variable declared before '='
-#include "tensorprotoutils.h"
-
-#include "TestCase.h"
-#include <fstream>
-#include <memory>
-#include "core/common/logging/logging.h"
-#include "core/common/common.h"
-#include "core/platform/env.h"
-#include "core/platform/ort_mutex.h"
-#include "core/session/onnxruntime_cxx_api.h"
-#include "core/framework/path_lib.h"
-#include "core/framework/allocator.h"
-#include <sstream>
-#include <map>
-#include <regex>
-#include "OrtValueList.h"
-
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wignored-qualifiers"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#else
-#pragma warning(push)
-#pragma warning(disable : 4018) /*'expression' : signed/unsigned mismatch */
-#pragma warning(disable : 4065) /*switch statement contains 'default' but no 'case' labels*/
-#pragma warning(disable : 4100)
-#pragma warning(disable : 4505)
-#pragma warning(disable : 4146) /*unary minus operator applied to unsigned type, result still unsigned*/
-#pragma warning(disable : 4244) /*'conversion' conversion from 'type1' to 'type2', possible loss of data*/
-#pragma warning(disable : 4251) /*'identifier' : class 'type' needs to have dll-interface to be used by clients of class 'type2'*/
-#pragma warning(disable : 4267) /*'var' : conversion from 'size_t' to 'type', possible loss of data*/
-#pragma warning(disable : 4305) /*'identifier' : truncation from 'type1' to 'type2'*/
-#pragma warning(disable : 4307) /*'operator' : integral constant overflow*/
-#pragma warning(disable : 4309) /*'conversion' : truncation of constant value*/
-#pragma warning(disable : 4334) /*'operator' : result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?)*/
-#pragma warning(disable : 4355) /*'this' : used in base member initializer list*/
-#pragma warning(disable : 4506) /*no definition for inline function 'function'*/
-#pragma warning(disable : 4800) /*'type' : forcing value to bool 'true' or 'false' (performance warning)*/
-#pragma warning(disable : 4996) /*The compiler encountered a deprecated declaration.*/
-#endif
-#include <google/protobuf/util/delimited_message_util.h>
-#include "tml.pb.h"
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#else
-#pragma warning(pop)
-#endif
-
-using namespace onnxruntime;
-using namespace onnxruntime::common;
-using google::protobuf::RepeatedPtrField;
-
-using ORT_VALUE_HOLDER = std::unique_ptr<OrtValue, decltype(&OrtReleaseValue)>;
-
-const std::string TestModelInfo::unknown_version = "unknown version";
-
-namespace {
-template <typename T>
-ONNXTensorElementDataType NumericTypeToONNXType();
-template <>
-ONNXTensorElementDataType NumericTypeToONNXType<float>() {
-  return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
-}
-
-template <>
-ONNXTensorElementDataType NumericTypeToONNXType<double>() {
-  return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE;
-}
-
-template <>
-ONNXTensorElementDataType NumericTypeToONNXType<int64_t>() {
-  return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64;
-}
-
-template <>
-ONNXTensorElementDataType NumericTypeToONNXType<std::string>() {
-  return ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING;
-}
-
-template <typename T>
-OrtValue* CreateTensorWithDataAsOrtValue(OrtAllocatorInfo* info, std::vector<T>& input) {
-  std::vector<int64_t> dims(1, input.size());
-  OrtValue* ret = nullptr;
-  ORT_THROW_ON_ERROR(::OrtCreateTensorWithDataAsOrtValue(info, input.data(), input.size() * sizeof(T), dims.data(),
-                                                         dims.size(), NumericTypeToONNXType<T>(), &ret));
-  return ret;
-}
-
-template <typename key_type, typename value_type>
-OrtValue* PbMapToOrtValue(const google::protobuf::Map<key_type, value_type>& map) {
-  OrtAllocatorInfo* info;
-  ORT_THROW_ON_ERROR(OrtCreateCpuAllocatorInfo(OrtDeviceAllocator, OrtMemTypeDefault, &info));
-  std::unique_ptr<OrtAllocatorInfo, decltype(&OrtReleaseAllocatorInfo)> rel_info(info, OrtReleaseAllocatorInfo);
-  const size_t ele_count = map.size();
-  std::vector<int64_t> dims(1, ele_count);
-  std::vector<key_type> keys(ele_count);
-  std::vector<value_type> values(ele_count);
-  size_t i = 0;
-  for (auto& kvp : map) {
-    keys[i] = kvp.first;
-    values[i] = kvp.second;
-    ++i;
-  }
-  OrtValueArray map_in(2);
-  OrtValue* p = CreateTensorWithDataAsOrtValue(info, keys);
-  if (p == nullptr) ORT_THROW("Create keys tensor failed");
-  map_in.Set(0, p);
-
-  p = CreateTensorWithDataAsOrtValue(info, values);
-  if (p == nullptr) ORT_THROW("Create values tensor failed");
-  map_in.Set(1, p);
-
-  // create map ort value
-  OrtValue* map_ort = nullptr;
-  ORT_THROW_ON_ERROR(OrtCreateValue(map_in.Data(), map_in.Length(), ONNX_TYPE_MAP, &map_ort));
-  return map_ort;
-}
-
-template <typename T>
-void VectorProtoToOrtValue(const RepeatedPtrField<T>& input, ORT_VALUE_HOLDER& output) {
-  OrtAllocatorInfo* info;
-  ORT_THROW_ON_ERROR(OrtCreateCpuAllocatorInfo(OrtDeviceAllocator, OrtMemTypeDefault, &info));
-  std::unique_ptr<OrtAllocatorInfo, decltype(&OrtReleaseAllocatorInfo)> rel_info(info, OrtReleaseAllocatorInfo);
-  OrtValueArray in(input.size());
-  size_t j = 0;
-  for (const T& v : input) {
-    // create key tensor
-    const auto& map = v.v();
-    size_t ele_count = map.size();
-    using key_type = typename std::remove_reference<decltype(v.v())>::type::key_type;
-    using value_type = typename std::remove_reference<decltype(v.v())>::type::mapped_type;
-    std::vector<int64_t> dims(1, static_cast<int64_t>(ele_count));
-    std::vector<key_type> keys(ele_count);
-    std::vector<value_type> values(ele_count);
-    size_t i = 0;
-    for (auto& kvp : map) {
-      keys[i] = kvp.first;
-      values[i] = kvp.second;
-      ++i;
-    }
-    OrtValueArray map_in(2);
-    OrtValue* p = CreateTensorWithDataAsOrtValue(info, keys);
-    if (p == nullptr) ORT_THROW("Create keys tensor failed");
-    map_in.Set(0, p);
-
-    p = CreateTensorWithDataAsOrtValue(info, values);
-    if (p == nullptr) ORT_THROW("Create values tensor failed");
-    map_in.Set(1, p);
-
-    // create map ort value
-    OrtValue* map_ort = nullptr;
-    ORT_THROW_ON_ERROR(OrtCreateValue(map_in.Data(), map_in.Length(), ONNX_TYPE_MAP, &map_ort));
-    in.Set(j++, map_ort);
-  }
-  OrtValue* seq_ort = nullptr;
-  ORT_THROW_ON_ERROR(OrtCreateValue(in.Data(), in.Length(), ONNX_TYPE_SEQUENCE, &seq_ort));
-  output.reset(seq_ort);
-}
-
-template <typename CHAR_T>
-static int ExtractFileNo(const std::basic_string<CHAR_T>& name) {
-  size_t p1 = name.rfind('.');
-  size_t p2 = name.rfind('_', p1);
-  ++p2;
-  std::basic_string<CHAR_T> number_str = name.substr(p2, p1 - p2);
-  const CHAR_T* start = number_str.c_str();
-  const CHAR_T* end = number_str.c_str();
-  long ret = OrtStrtol(start, const_cast<CHAR_T**>(&end));
-  if (end == start) {
-    ORT_THROW("parse file name failed");
-  }
-  return static_cast<int>(ret);
-}
-using PATH_STRING_TYPE = std::basic_string<PATH_CHAR_TYPE>;
-
-class OnnxModelInfo : public TestModelInfo {
- private:
-  std::string node_name_;
-  std::string onnx_commit_tag_;
-  std::vector<ONNX_NAMESPACE::ValueInfoProto> input_value_info_;
-  std::vector<ONNX_NAMESPACE::ValueInfoProto> output_value_info_;
-
-  template <typename T>
-  static void RepeatedPtrFieldToVector(const ::google::protobuf::RepeatedPtrField<T>& input_value_info,
-                                       std::vector<T>& out) {
-    for (int i = 0; i != input_value_info.size(); ++i) {
-      out.push_back(input_value_info[i]);
-    }
-  }
-  const std::basic_string<PATH_CHAR_TYPE> model_url_;
-
- public:
-  OnnxModelInfo(_In_ const PATH_CHAR_TYPE* model_url) : model_url_(model_url) {
-    // parse model
-    int model_fd;
-    auto st = Env::Default().FileOpenRd(model_url, model_fd);
-    if (!st.IsOK()) {
-      ORT_THROW(st.ErrorMessage());
-    }
-    google::protobuf::io::FileInputStream f(model_fd);
-    f.SetCloseOnDelete(true);
-    ONNX_NAMESPACE::ModelProto model_pb;
-    if (!model_pb.ParseFromZeroCopyStream(&f)) {
-      ORT_THROW("Failed to load model because protobuf parsing failed.");
-    }
-#ifdef __GNUG__
-    std::smatch match;
-    std::string url_string{model_url};
-    const std::regex onnx_tag_regex("onnx[0-9a-z]{3}");  //e.g. onnx141, onnx150, onnxtip
-    if (std::regex_search(url_string, match, onnx_tag_regex)) {
-      onnx_commit_tag_ = match[0].str();
-    } else {
-      onnx_commit_tag_ = TestModelInfo::unknown_version;
-    }
-#endif
-    const ONNX_NAMESPACE::GraphProto& graph = model_pb.graph();
-    if (graph.node().size() == 1) {
-      node_name_ = graph.node()[0].op_type();
-    }
-    std::unordered_set<std::string> initializer_names;
-    for (const auto& init : graph.initializer()) {
-      if (!init.has_name()) continue;
-      initializer_names.insert(init.name());
-    }
-    for (const auto& p : graph.input()) {
-      if (!p.has_name()) ORT_THROW("input without name??");
-      if (initializer_names.find(p.name()) == initializer_names.end()) input_value_info_.push_back(p);
-    }
-    RepeatedPtrFieldToVector(graph.output(), output_value_info_);
-  }
-
-  const PATH_CHAR_TYPE* GetModelUrl() const override { return model_url_.c_str(); }
-  std::string GetModelVersion() const override { return onnx_commit_tag_; }
-
-  const std::string& GetNodeName() const override { return node_name_; }
-  const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const override {
-    return &output_value_info_[i];
-  }
-  int GetInputCount() const override { return static_cast<int>(input_value_info_.size()); }
-  int GetOutputCount() const override { return static_cast<int>(output_value_info_.size()); }
-  const std::string& GetInputName(size_t i) const override { return input_value_info_[i].name(); }
-
-  const std::string& GetOutputName(size_t i) const override { return output_value_info_[i].name(); }
-};
-
-template <typename PATH_CHAR_TYPE>
-static void SortTensorFileNames(std::vector<std::basic_string<PATH_CHAR_TYPE>>& input_pb_files) {
-  if (input_pb_files.size() <= 1) return;
-  std::sort(input_pb_files.begin(), input_pb_files.end(),
-            [](const std::basic_string<PATH_CHAR_TYPE>& left, const std::basic_string<PATH_CHAR_TYPE>& right) -> bool {
-              std::basic_string<PATH_CHAR_TYPE> leftname = GetLastComponent(left);
-              std::basic_string<PATH_CHAR_TYPE> rightname = GetLastComponent(right);
-              int left1 = ExtractFileNo(leftname);
-              int right1 = ExtractFileNo(rightname);
-              return left1 < right1;
-            });
-  for (size_t i = 0; i != input_pb_files.size(); ++i) {
-    int fileno = ExtractFileNo(GetLastComponent(input_pb_files[i]));
-    if (static_cast<size_t>(fileno) != i) {
-      ORT_THROW("illegal input file name:", ToMBString(input_pb_files[i]));
-    }
-  }
-}
-
-OrtValue* TensorToOrtValue(const ONNX_NAMESPACE::TensorProto& t, HeapBuffer& b) {
-  size_t len = 0;
-  auto status = onnxruntime::perftest::GetSizeInBytesFromTensorProto<0>(t, &len);
-  if (!status.IsOK()) {
-    ORT_THROW(status.ToString());
-  }
-  void* p = len == 0 ? nullptr : b.AllocMemory(len);
-  Ort::Value temp_value{nullptr};
-  OrtAllocatorInfo cpu_allocator_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
-  status = onnxruntime::perftest::TensorProtoToMLValue(t, onnxruntime::perftest::MemBuffer(p, len, cpu_allocator_info),
-                                                       temp_value);
-  if (!status.IsOK()) {
-    ORT_THROW(status.ToString());
-  }
-  return temp_value.release();
-}
-
-void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo* modelinfo,
-                  std::unordered_map<std::string, OrtValue*>& name_data_map, HeapBuffer& b, std::ostringstream& oss) {
-  google::protobuf::io::FileInputStream f(test_data_pb_fd);
-  f.SetCloseOnDelete(true);
-  google::protobuf::io::CodedInputStream coded_input(&f);
-  bool clean_eof = false;
-  int item_id = 1;
-  for (proto::TraditionalMLData data;
-       google::protobuf::util::ParseDelimitedFromCodedStream(&data, &coded_input, &clean_eof);
-       ++item_id, data.Clear()) {
-    try {
-      ORT_VALUE_HOLDER gvalue(nullptr, OrtReleaseValue);
-      switch (data.values_case()) {
-        case proto::TraditionalMLData::kVectorMapStringToFloat:
-          VectorProtoToOrtValue(data.vector_map_string_to_float().v(), gvalue);
-          break;
-        case proto::TraditionalMLData::kVectorMapInt64ToFloat:
-          VectorProtoToOrtValue(data.vector_map_int64_to_float().v(), gvalue);
-          break;
-        case proto::TraditionalMLData::kMapStringToString:
-          gvalue.reset(PbMapToOrtValue(data.map_string_to_string().v()));
-          break;
-        case proto::TraditionalMLData::kMapStringToInt64:
-          gvalue.reset(PbMapToOrtValue(data.map_string_to_int64().v()));
-          break;
-        case proto::TraditionalMLData::kMapStringToFloat:
-          gvalue.reset(PbMapToOrtValue(data.map_string_to_float().v()));
-          break;
-        case proto::TraditionalMLData::kMapStringToDouble:
-          gvalue.reset(PbMapToOrtValue(data.map_string_to_double().v()));
-          break;
-        case proto::TraditionalMLData::kMapInt64ToString:
-          gvalue.reset(PbMapToOrtValue(data.map_int64_to_string().v()));
-          break;
-        case proto::TraditionalMLData::kMapInt64ToInt64:
-          gvalue.reset(PbMapToOrtValue(data.map_int64_to_int64().v()));
-          break;
-        case proto::TraditionalMLData::kMapInt64ToFloat:
-          gvalue.reset(PbMapToOrtValue(data.map_int64_to_float().v()));
-          break;
-        case proto::TraditionalMLData::kMapInt64ToDouble:
-          gvalue.reset(PbMapToOrtValue(data.map_int64_to_double().v()));
-          break;
-        case proto::TraditionalMLData::kTensor: {
-          gvalue.reset(TensorToOrtValue(data.tensor(), b));
-        } break;
-        default:
-          ORT_NOT_IMPLEMENTED("unknown data type inside TraditionalMLData");
-      }
-      if (!data.debug_info().empty()) {
-        oss << ":" << data.debug_info();
-      }
-      std::string value_name = data.name();
-      if (value_name.empty()) {
-        const size_t c = name_data_map.size();
-        value_name = is_input ? modelinfo->GetInputName(c) : modelinfo->GetOutputName(c);
-      }
-
-      auto pv = name_data_map.insert(std::make_pair(value_name, gvalue.release()));
-      if (!pv.second) {
-        ORT_THROW("duplicated test data name");
-        break;
-      }
-    } catch (onnxruntime::NotImplementedException& ex) {
-      std::ostringstream oss2;
-      oss2 << "load the " << item_id << "-th item failed," << ex.what();
-      ORT_NOT_IMPLEMENTED(oss2.str());
-    } catch (std::exception& ex) {
-      std::ostringstream oss2;
-      oss2 << "load the " << item_id << "-th item failed," << ex.what();
-      ORT_THROW(oss2.str());
-    }
-  }
-  if (!clean_eof) {
-    ORT_THROW("parse input file failed, has extra unparsed data");
-  }
-}
-
-}  // namespace
-
-TestModelInfo* TestModelInfo::LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url) {
-  return new OnnxModelInfo(model_url);
-}
-
-/**
-   * test_case_dir must have contents of:
-   * model.onnx
-   * ???/input_??.pb
-   * ???/output_??.pb
-   * ???/input_??.pb
-   * ???/output_??.pb
-   */
-class OnnxTestCase : public ITestCase {
- private:
-  std::string test_case_name_;
-  std::vector<std::string> debuginfo_strings;
-  onnxruntime::OrtMutex m_;
-
-  std::vector<std::basic_string<PATH_CHAR_TYPE>> test_data_dirs_;
-
-  std::string GetDatasetDebugInfoString(size_t dataset_id) override {
-    std::lock_guard<OrtMutex> l(m_);
-    if (dataset_id < debuginfo_strings.size()) {
-      return debuginfo_strings[dataset_id];
-    }
-    // return empty string
-    return std::string();
-  }
-
-  void ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto>& test_data_pbs, HeapBuffer& b, bool is_input,
-                       std::unordered_map<std::string, OrtValue*>& out);
-
-  std::once_flag model_parsed_;
-  std::once_flag config_parsed_;
-  double per_sample_tolerance_;
-  double relative_per_sample_tolerance_;
-  bool post_processing_;
-  TestModelInfo* model_info_;
-  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OnnxTestCase);
-
- public:
-  OnnxTestCase(const std::string& test_case_name, TestModelInfo* model, double default_per_sample_tolerance,
-               double default_relative_per_sample_tolerance);
-  ~OnnxTestCase() override { delete model_info_; }
-  Status GetPerSampleTolerance(double* value) override;
-  Status GetRelativePerSampleTolerance(double* value) override;
-  Status GetPostProcessing(bool* value) override;
-
-  const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const override {
-    return model_info_->GetOutputInfoFromModel(i);
-  }
-
-  size_t GetDataCount() const override {
-    return test_data_dirs_.size();
-  }
-  const std::string& GetNodeName() const override { return model_info_->GetNodeName(); }
-
-  const PATH_CHAR_TYPE* GetModelUrl() const override { return model_info_->GetModelUrl(); }
-  const std::string& GetTestCaseName() const override {
-    return test_case_name_;
-  }
-  std::string GetTestCaseVersion() const override {
-    return model_info_->GetModelVersion();
-  }
-  void LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>&, bool is_input) override;
-};
-
-ITestCase* CreateOnnxTestCase(const std::string& test_case_name, TestModelInfo* model,
-                              double default_per_sample_tolerance, double default_relative_per_sample_tolerance) {
-  return new OnnxTestCase(test_case_name, model, default_per_sample_tolerance, default_relative_per_sample_tolerance);
-}
-
-Status OnnxTestCase::GetPerSampleTolerance(double* value) {
-  *value = per_sample_tolerance_;
-  return Status::OK();
-}
-
-Status OnnxTestCase::GetRelativePerSampleTolerance(double* value) {
-  *value = relative_per_sample_tolerance_;
-  return Status::OK();
-}
-
-Status OnnxTestCase::GetPostProcessing(bool* value) {
-  *value = post_processing_;
-  return Status::OK();
-}
-
-static std::string trim_str(const std::string& s) {
-  std::string ltrim = std::regex_replace(s, std::regex("^\\s+"), std::string(""));
-  std::string result = std::regex_replace(ltrim, std::regex("\\s+$"), std::string(""));
-  return result;
-}
-
-static bool read_config_file(const std::basic_string<PATH_CHAR_TYPE>& path, std::map<std::string, std::string>& fc) {
-  std::ifstream infile(path);
-  if (!infile.good()) {
-    return false;
-  }
-
-  for (std::string line; std::getline(infile, line);) {
-    std::istringstream ss(line);
-    if (line.empty()) {
-      continue;
-    }
-    std::vector<std::string> tokens;
-    for (std::string token; std::getline(ss, token, ':');) {
-      std::string trimmed_token = trim_str(token);
-      if (trimmed_token.empty()) {
-        continue;
-      }
-      tokens.push_back(trimmed_token);
-    }
-    fc[tokens[0]] = tokens[1];
-  }
-  return true;
-}
-
-//load tensors from disk
-template <typename PATH_STRING_TYPE>
-static void LoadTensors(const std::vector<PATH_STRING_TYPE>& pb_files,
-                        std::vector<ONNX_NAMESPACE::TensorProto>* input_pbs) {
-  for (size_t i = 0; i != pb_files.size(); ++i) {
-    int tensor_fd;
-    auto st = Env::Default().FileOpenRd(pb_files.at(i), tensor_fd);
-    if (!st.IsOK()) {
-      ORT_THROW("open file '", ToMBString(pb_files.at(i)), "' failed:", st.ErrorMessage());
-    }
-    google::protobuf::io::FileInputStream f(tensor_fd);
-    f.SetCloseOnDelete(true);
-    ONNX_NAMESPACE::TensorProto tensor;
-    if (!tensor.ParseFromZeroCopyStream(&f)) {
-      ORT_THROW("parse file '", ToMBString(pb_files.at(i)), "' failed");
-    }
-    input_pbs->emplace_back(tensor);
-  }
-}
-
-void OnnxTestCase::LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>& name_data_map,
-                                bool is_input) {
-  if (id >= test_data_dirs_.size()) {
-    ORT_THROW("index out of bound");
-  }
-
-  PATH_STRING_TYPE test_data_pb = ConcatPathComponent<PATH_CHAR_TYPE>(
-      test_data_dirs_[id], (is_input ? ORT_TSTR("inputs.pb") : ORT_TSTR("outputs.pb")));
-  int test_data_pb_fd;
-  auto st = Env::Default().FileOpenRd(test_data_pb, test_data_pb_fd);
-  if (st.IsOK()) {  //has an all-in-one input file
-    std::ostringstream oss;
-    {
-      std::lock_guard<OrtMutex> l(m_);
-      oss << debuginfo_strings[id];
-    }
-    try {
-      LoopDataFile(test_data_pb_fd, is_input, model_info_, name_data_map, b, oss);
-    } catch (std::exception& ex) {
-      std::ostringstream oss2;
-      oss2 << "parse data file \"" << ToMBString(test_data_pb) << "\" failed:" << ex.what();
-      ORT_THROW(oss.str());
-    }
-    {
-      std::lock_guard<OrtMutex> l(m_);
-      debuginfo_strings[id] = oss.str();
-    }
-    return;
-  }
-
-  std::vector<PATH_STRING_TYPE> test_data_pb_files;
-  const PATH_STRING_TYPE& dir_path = test_data_dirs_[id];
-  LoopDir(dir_path,
-          [&test_data_pb_files, &dir_path, is_input](const PATH_CHAR_TYPE* filename, OrtFileType f_type) -> bool {
-            if (filename[0] == '.') return true;
-            if (f_type != OrtFileType::TYPE_REG) return true;
-            std::basic_string<PATH_CHAR_TYPE> filename_str = filename;
-            if (!HasExtensionOf(filename_str, ORT_TSTR("pb"))) return true;
-            const std::basic_string<PATH_CHAR_TYPE> file_prefix =
-                is_input ? ORT_TSTR("input_") : ORT_TSTR("output_");
-            if (!filename_str.compare(0, file_prefix.length(), file_prefix)) {
-              std::basic_string<PATH_CHAR_TYPE> p = ConcatPathComponent<PATH_CHAR_TYPE>(dir_path, filename_str);
-              test_data_pb_files.push_back(p);
-            }
-            return true;
-          });
-  SortTensorFileNames(test_data_pb_files);
-
-  std::vector<ONNX_NAMESPACE::TensorProto> test_data_pbs;
-  LoadTensors(test_data_pb_files, &test_data_pbs);
-  ConvertTestData(test_data_pbs, b, is_input, name_data_map);
-}
-
-void OnnxTestCase::ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto>& test_data_pbs, HeapBuffer& b,
-                                   bool is_input, std::unordered_map<std::string, OrtValue*>& out) {
-  bool has_valid_names = true;
-  std::vector<std::string> var_names(test_data_pbs.size());
-  for (size_t input_index = 0; input_index != test_data_pbs.size(); ++input_index) {
-    std::string name = test_data_pbs[input_index].name();
-    if (name.empty()) {
-      has_valid_names = false;
-      break;
-    }
-    var_names[input_index] = name;
-  }
-  if (!has_valid_names) {
-    size_t count = static_cast<size_t>(is_input ? model_info_->GetInputCount() : model_info_->GetOutputCount());
-    if (count != test_data_pbs.size()) {
-      ORT_THROW("data count mismatch, expect ", count, ", got ", test_data_pbs.size());
-    }
-    for (size_t i = 0; i != count; ++i) {
-      var_names[i] = is_input ? model_info_->GetInputName(i) : model_info_->GetOutputName(i);
-    }
-  }
-  for (size_t input_index = 0; input_index != test_data_pbs.size(); ++input_index) {
-    std::string name = var_names[input_index];
-    const ONNX_NAMESPACE::TensorProto& input = test_data_pbs[input_index];
-    size_t len = 0;
-
-    auto status = onnxruntime::perftest::GetSizeInBytesFromTensorProto<0>(input, &len);
-    if (!status.IsOK()) {
-      ORT_THROW(status.ToString());
-    }
-    void* p = len == 0 ? nullptr : b.AllocMemory(len);
-    OrtAllocatorInfo cpu_allocator_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
-    Ort::Value v1{nullptr};
-    status = onnxruntime::perftest::TensorProtoToMLValue(input, onnxruntime::perftest::MemBuffer(p, len, cpu_allocator_info),
-                                                         v1);
-    if (!status.IsOK()) {
-      ORT_THROW(status.ToString());
-    }
-    out.insert(std::make_pair(name, v1.release()));
-  }
-}
-
-OnnxTestCase::OnnxTestCase(const std::string& test_case_name, _In_ TestModelInfo* model,
-                           double default_per_sample_tolerance, double default_relative_per_sample_tolerance)
-    : test_case_name_(test_case_name), model_info_(model) {
-  std::basic_string<PATH_CHAR_TYPE> test_case_dir = model_info_->GetDir();
-
-  // parse config
-  std::basic_string<PATH_CHAR_TYPE> config_path =
-      ConcatPathComponent<PATH_CHAR_TYPE>(test_case_dir, ORT_TSTR("config.txt"));
-  /* Note: protobuf-lite doesn't support reading protobuf files as text-format. Config.txt is exactly that.
-     That's the reason I've to parse the file in a different way to read the configs. Currently
-     this affects 2 tests - fp16_tiny_yolov2 and fp16_inception_v1. It's not clear why we've to use protobuf
-     to represent simple config files that have only key-value pairs.
-   */
-  std::map<std::string, std::string> fc;
-  per_sample_tolerance_ = default_per_sample_tolerance;
-  relative_per_sample_tolerance_ = default_relative_per_sample_tolerance;
-  post_processing_ = false;
-  if (read_config_file(config_path, fc)) {
-    if (fc.count("per_sample_tolerance") > 0) {
-      per_sample_tolerance_ = stod(fc["per_sample_tolerance"]);
-    }
-    if (fc.count("relative_per_sample_tolerance") > 0) {
-      relative_per_sample_tolerance_ = stod(fc["relative_per_sample_tolerance"]);
-    }
-    if (fc.count("post_processing") > 0) {
-      post_processing_ = fc["post_processing"] == "true";
-    }
-  }
-
-  LoopDir(test_case_dir, [&test_case_dir, this](const PATH_CHAR_TYPE* filename, OrtFileType f_type) -> bool {
-    if (filename[0] == '.') return true;
-    if (f_type == OrtFileType::TYPE_DIR) {
-      std::basic_string<PATH_CHAR_TYPE> p = ConcatPathComponent<PATH_CHAR_TYPE>(test_case_dir, filename);
-      test_data_dirs_.push_back(p);
-      debuginfo_strings.push_back(ToMBString(p));
-    }
-    return true;
-  });
-}
diff --git a/onnxruntime/test/perftest/TestCase.h b/onnxruntime/test/perftest/TestCase.h
deleted file mode 100644
index 119f1673125ad..0000000000000
--- a/onnxruntime/test/perftest/TestCase.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma once
-#include <vector>
-#include <mutex>
-#include <unordered_map>
-#include <core/common/common.h>
-#include <core/common/status.h>
-#include <core/session/onnxruntime_cxx_api.h>
-#include <core/framework/path_lib.h>
-#include "heap_buffer.h"
-
-namespace ONNX_NAMESPACE {
-class ValueInfoProto;
-}
-
-//One test case is for one model file
-//One test case can contain multiple test data(input/output pairs)
-class ITestCase {
- public:
-  virtual void LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>& name_data_map,
-                            bool is_input) = 0;
-  virtual const PATH_CHAR_TYPE* GetModelUrl() const = 0;
-  virtual const std::string& GetNodeName() const = 0;
-  virtual const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const = 0;
-
-  virtual const std::string& GetTestCaseName() const = 0;
-  virtual std::string GetTestCaseVersion() const = 0;
-  //a string to help identify the dataset
-  virtual std::string GetDatasetDebugInfoString(size_t dataset_id) = 0;
-  //The number of input/output pairs
-  virtual size_t GetDataCount() const = 0;
-  virtual ~ITestCase() = default;
-  virtual ::onnxruntime::common::Status GetPerSampleTolerance(double* value) = 0;
-  virtual ::onnxruntime::common::Status GetRelativePerSampleTolerance(double* value) = 0;
-  virtual ::onnxruntime::common::Status GetPostProcessing(bool* value) = 0;
-};
-
-class TestModelInfo {
- public:
-  virtual const PATH_CHAR_TYPE* GetModelUrl() const = 0;
-  virtual std::basic_string<PATH_CHAR_TYPE> GetDir() const {
-    std::basic_string<PATH_CHAR_TYPE> test_case_dir;
-    auto st = onnxruntime::GetDirNameFromFilePath(GetModelUrl(), test_case_dir);
-    if (!st.IsOK()) {
-      ORT_THROW("GetDirNameFromFilePath failed");
-    }
-    return test_case_dir;
-  }
-  virtual const std::string& GetNodeName() const = 0;
-  virtual const ONNX_NAMESPACE::ValueInfoProto* GetOutputInfoFromModel(size_t i) const = 0;
-  virtual int GetInputCount() const = 0;
-  virtual int GetOutputCount() const = 0;
-  virtual const std::string& GetInputName(size_t i) const = 0;
-  virtual const std::string& GetOutputName(size_t i) const = 0;
-  virtual std::string GetModelVersion() const { return ""; }
-  virtual ~TestModelInfo() = default;
-
-  static TestModelInfo* LoadOnnxModel(_In_ const PATH_CHAR_TYPE* model_url);
-  static const std::string unknown_version;
-};
-
-ITestCase* CreateOnnxTestCase(const std::string& test_case_name, TestModelInfo* model,
-                              double default_per_sample_tolerance, double default_relative_per_sample_tolerance);
diff --git a/onnxruntime/test/perftest/heap_buffer.cc b/onnxruntime/test/perftest/heap_buffer.cc
deleted file mode 100644
index 5305f684eb392..0000000000000
--- a/onnxruntime/test/perftest/heap_buffer.cc
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#include "heap_buffer.h"
-
-HeapBuffer::~HeapBuffer() {
-  for (void* p : buffers_) {
-    free(p);
-  }
-}
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/heap_buffer.h b/onnxruntime/test/perftest/heap_buffer.h
deleted file mode 100644
index f888818ef1f4c..0000000000000
--- a/onnxruntime/test/perftest/heap_buffer.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma once
-#include <vector>
-#include <memory>
-
-/**
- * A holder for delay freed buffers
- */
-class HeapBuffer {
- public:
-  HeapBuffer() = default;
-  /**
-   * free all the buffers allocated from 'AllocMemory' function
-   */
-  ~HeapBuffer();
-  void* AllocMemory(size_t size) {
-    void* p = malloc(size);
-    buffers_.push_back(p);
-    return p;
-  }
-
- private:
-  std::vector<void*> buffers_;
-};
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/tensorprotoutils.cc b/onnxruntime/test/perftest/tensorprotoutils.cc
deleted file mode 100644
index 9ca9d53b5fd90..0000000000000
--- a/onnxruntime/test/perftest/tensorprotoutils.cc
+++ /dev/null
@@ -1,428 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#include "tensorprotoutils.h"
-
-#include <memory>
-#include <algorithm>
-#include <limits>
-#include <gsl/pointers>
-#include "core/framework/data_types.h"
-#include "core/framework/allocator.h"
-#include "core/session/onnxruntime_cxx_api.h"
-#include "core/graph/onnx_protobuf.h"
-
-namespace onnxruntime {
-namespace perftest {
-#ifdef __GNUC__
-constexpr inline bool IsLittleEndianOrder() noexcept { return __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; }
-#else
-// On Windows and Mac, this function should always return true
-GSL_SUPPRESS(type .1)  // allow use of reinterpret_cast for this special case
-inline bool IsLittleEndianOrder() noexcept {
-  static int n = 1;
-  return (*reinterpret_cast<char*>(&n) == 1);
-}
-#endif
-
-//From core common
-inline void MakeStringInternal(std::ostringstream& /*ss*/) noexcept {
-}
-
-template <typename T>
-inline void MakeStringInternal(std::ostringstream& ss, const T& t) noexcept {
-  ss << t;
-}
-
-template <typename T, typename... Args>
-inline void MakeStringInternal(std::ostringstream& ss, const T& t, const Args&... args) noexcept {
-  ::onnxruntime::MakeStringInternal(ss, t);
-  ::onnxruntime::MakeStringInternal(ss, args...);
-}
-
-template <typename... Args>
-std::string MakeString(const Args&... args) {
-  std::ostringstream ss;
-  ::onnxruntime::MakeStringInternal(ss, args...);
-  return std::string(ss.str());
-}
-
-// Specializations for already-a-string types.
-template <>
-inline std::string MakeString(const std::string& str) {
-  return str;
-}
-inline std::string MakeString(const char* p_str) {
-  return p_str;
-}
-
-std::vector<int64_t> GetTensorShapeFromTensorProto(const onnx::TensorProto& tensor_proto) {
-  const auto& dims = tensor_proto.dims();
-  std::vector<int64_t> tensor_shape_vec(static_cast<size_t>(dims.size()));
-  for (int i = 0; i < dims.size(); ++i) {
-    tensor_shape_vec[i] = dims[i];
-  }
-
-  return tensor_shape_vec;
-}
-
-// This function doesn't support string tensors
-template <typename T>
-static void UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length, size_t expected_size,
-                                    /*out*/ T* p_data) {
-  // allow this low level routine to be somewhat unsafe. assuming it's thoroughly tested and valid
-  GSL_SUPPRESS(type)       // type.1 reinterpret-cast; type.4 C-style casts; type.5 'T result;' is uninitialized;
-  GSL_SUPPRESS(bounds .1)  // pointer arithmetic
-  GSL_SUPPRESS(f .23)      // buff and temp_bytes never tested for nullness and could be gsl::not_null
-  {
-    size_t expected_size_in_bytes;
-    if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_size, sizeof(T), &expected_size_in_bytes)) {
-      throw Ort::Exception("size overflow", OrtErrorCode::ORT_FAIL);
-    }
-    if (raw_data_length != expected_size_in_bytes)
-      throw Ort::Exception(MakeString("UnpackTensor: the pre-allocated size does not match the raw data size, expected ",
-                                      expected_size_in_bytes, ", got ", raw_data_length),
-                           OrtErrorCode::ORT_FAIL);
-    if (IsLittleEndianOrder()) {
-      memcpy(p_data, raw_data, raw_data_length);
-    } else {
-      const size_t type_size = sizeof(T);
-      const char* buff = reinterpret_cast<const char*>(raw_data);
-      for (size_t i = 0; i < raw_data_length; i += type_size, buff += type_size) {
-        T result;
-        const char* temp_bytes = reinterpret_cast<char*>(&result);
-        for (size_t j = 0; j < type_size; ++j) {
-          memcpy((void*)&temp_bytes[j], (void*)&buff[type_size - 1 - i], 1);
-        }
-        p_data[i] = result;
-      }
-    }
-  }
-}
-
-// This macro doesn't work for Float16/bool/string tensors
-#define DEFINE_UNPACK_TENSOR(T, Type, field_name, field_size)                                                \
-  template <>                                                                                                \
-  void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,              \
-                    /*out*/ T* p_data, int64_t expected_size) {                                              \
-    if (nullptr == p_data) {                                                                                 \
-      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                          \
-      if (size == 0) return;                                                                                 \
-      throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);                                          \
-    }                                                                                                        \
-    if (nullptr == p_data || Type != tensor.data_type()) {                                                   \
-      throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);                                          \
-    }                                                                                                        \
-    if (raw_data != nullptr) {                                                                               \
-      UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);                                \
-      return;                                                                                                \
-    }                                                                                                        \
-    if (tensor.field_size() != expected_size)                                                                \
-      throw Ort::Exception(MakeString("corrupted protobuf data: tensor shape size(", expected_size,          \
-                                      ") does not match the data size(", tensor.field_size(), ") in proto"), \
-                           OrtErrorCode::ORT_FAIL);                                                          \
-    auto& data = tensor.field_name();                                                                        \
-    for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                              \
-      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                    \
-    return;                                                                                                  \
-  }
-
-// TODO: complex64 complex128
-DEFINE_UNPACK_TENSOR(float, onnx::TensorProto_DataType_FLOAT, float_data, float_data_size)
-DEFINE_UNPACK_TENSOR(double, onnx::TensorProto_DataType_DOUBLE, double_data, double_data_size);
-DEFINE_UNPACK_TENSOR(uint8_t, onnx::TensorProto_DataType_UINT8, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int8_t, onnx::TensorProto_DataType_INT8, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int16_t, onnx::TensorProto_DataType_INT16, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(uint16_t, onnx::TensorProto_DataType_UINT16, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int32_t, onnx::TensorProto_DataType_INT32, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int64_t, onnx::TensorProto_DataType_INT64, int64_data, int64_data_size)
-DEFINE_UNPACK_TENSOR(uint64_t, onnx::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
-DEFINE_UNPACK_TENSOR(uint32_t, onnx::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)
-
-// doesn't support raw data
-template <>
-void UnpackTensor(const onnx::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
-                  /*out*/ std::string* p_data, int64_t expected_size) {
-  if (nullptr == p_data) {
-    if (tensor.string_data_size() == 0) return;
-    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
-  }
-  if (onnx::TensorProto_DataType_STRING != tensor.data_type()) {
-    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
-  }
-
-  if (tensor.string_data_size() != expected_size)
-    throw Ort::Exception(
-        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
-
-  auto& string_data = tensor.string_data();
-  for (const auto& iter : string_data) {
-    *p_data++ = iter;
-  }
-
-  return;
-}
-template <>
-void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                  /*out*/ bool* p_data, int64_t expected_size) {
-  if (nullptr == p_data) {
-    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
-    if (size == 0) return;
-    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
-  }
-  if (onnx::TensorProto_DataType_BOOL != tensor.data_type()) {
-    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
-  }
-
-  if (raw_data != nullptr) {
-    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
-  }
-
-  if (tensor.int32_data_size() != expected_size)
-    throw Ort::Exception(
-        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
-  for (int iter : tensor.int32_data()) {
-    *p_data++ = static_cast<bool>(iter);
-  }
-
-  return;
-}
-template <>
-void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                  /*out*/ MLFloat16* p_data, int64_t expected_size) {
-  if (nullptr == p_data) {
-    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
-    if (size == 0) return;
-    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
-  }
-  if (onnx::TensorProto_DataType_FLOAT16 != tensor.data_type()) {
-    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
-  }
-
-  if (raw_data != nullptr) {
-    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
-  }
-
-  if (tensor.int32_data_size() != expected_size)
-    throw Ort::Exception(
-        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
-
-  constexpr int max_value = std::numeric_limits<uint16_t>::max();
-  for (int i = 0; i < static_cast<int>(expected_size); i++) {
-    int v = tensor.int32_data()[i];
-    if (v < 0 || v > max_value) {
-      throw Ort::Exception(
-          "data overflow", OrtErrorCode::ORT_FAIL);
-    }
-    p_data[i] = MLFloat16(static_cast<uint16_t>(v));
-  }
-
-  return;
-}
-
-template <>
-void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                  /*out*/ BFloat16* p_data, int64_t expected_size) {
-  if (nullptr == p_data) {
-    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
-    if (size == 0)
-      return;
-
-    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
-  }
-  if (onnx::TensorProto_DataType_BFLOAT16 != tensor.data_type()) {
-    throw Ort::Exception("", OrtErrorCode::ORT_INVALID_ARGUMENT);
-  }
-
-  if (raw_data != nullptr) {
-    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
-  }
-
-  if (tensor.int32_data_size() != expected_size)
-    throw Ort::Exception(
-        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
-
-  constexpr int max_value = std::numeric_limits<uint16_t>::max();
-  for (int i = 0; i < static_cast<int>(expected_size); i++) {
-    int v = tensor.int32_data()[i];
-    if (v < 0 || v > max_value) {
-      throw Ort::Exception(
-          "data overflow", OrtErrorCode::ORT_FAIL);
-    }
-    p_data[i] = BFloat16(static_cast<uint16_t>(v));
-  }
-
-  return;
-}
-
-#define CASE_PROTO_TRACE(X, Y)                                                            \
-  case onnx::TensorProto_DataType::TensorProto_DataType_##X:                              \
-    if (!IAllocator::CalcMemSizeForArrayWithAlignment<alignment>(size, sizeof(Y), out)) { \
-      throw Ort::Exception("Invalid TensorProto", OrtErrorCode::ORT_FAIL);                \
-    }                                                                                     \
-    break;
-
-template <size_t alignment>
-Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out) {
-  const auto& dims = tensor_proto.dims();
-  size_t size = 1;
-  for (google::protobuf::int64 dim : dims) {
-    if (dim < 0 || static_cast<uint64_t>(dim) >= std::numeric_limits<size_t>::max()) {
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
-    }
-    if (!IAllocator::CalcMemSizeForArray(size, static_cast<size_t>(dim), &size)) {
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
-    }
-  }
-  switch (tensor_proto.data_type()) {
-    CASE_PROTO_TRACE(FLOAT, float);
-    CASE_PROTO_TRACE(DOUBLE, double);
-    CASE_PROTO_TRACE(BOOL, bool);
-    CASE_PROTO_TRACE(INT8, int8_t);
-    CASE_PROTO_TRACE(INT16, int16_t);
-    CASE_PROTO_TRACE(INT32, int32_t);
-    CASE_PROTO_TRACE(INT64, int64_t);
-    CASE_PROTO_TRACE(UINT8, uint8_t);
-    CASE_PROTO_TRACE(UINT16, uint16_t);
-    CASE_PROTO_TRACE(UINT32, uint32_t);
-    CASE_PROTO_TRACE(UINT64, uint64_t);
-    CASE_PROTO_TRACE(FLOAT16, MLFloat16);
-    CASE_PROTO_TRACE(BFLOAT16, BFloat16);
-    CASE_PROTO_TRACE(STRING, std::string);
-    default:
-      return Status(common::ONNXRUNTIME, common::NOT_IMPLEMENTED);
-  }
-  return Status::OK();
-}
-
-struct UnInitializeParam {
-  void* preallocated;
-  size_t preallocated_size;
-  ONNXTensorElementDataType ele_type;
-};
-
-void OrtInitializeBufferForTensor(void* input, size_t input_len,
-                                  ONNXTensorElementDataType type) {
-  try {
-    if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return;
-    size_t tensor_size = input_len / sizeof(std::string);
-    std::string* ptr = reinterpret_cast<std::string*>(input);
-    for (size_t i = 0, n = tensor_size; i < n; ++i) {
-      new (ptr + i) std::string();
-    }
-  } catch (std::exception& ex) {
-    throw Ort::Exception(ex.what(), OrtErrorCode::ORT_RUNTIME_EXCEPTION);
-  }
-  return;
-}
-
-#define CASE_PROTO(X, Y)                                                                                           \
-  case onnx::TensorProto_DataType::TensorProto_DataType_##X:                                                       \
-    ::onnxruntime::perftest::UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, tensor_size); \
-    break;
-
-#define CASE_TYPE(X)                   \
-  case onnx::TensorProto_DataType_##X: \
-    return ONNX_TENSOR_ELEMENT_DATA_TYPE_##X;
-
-ONNXTensorElementDataType CApiElementTypeFromProtoType(int type) {
-  switch (type) {
-    CASE_TYPE(FLOAT)
-    CASE_TYPE(UINT8)
-    CASE_TYPE(INT8)
-    CASE_TYPE(UINT16)
-    CASE_TYPE(INT16)
-    CASE_TYPE(INT32)
-    CASE_TYPE(INT64)
-    CASE_TYPE(STRING)
-    CASE_TYPE(BOOL)
-    CASE_TYPE(FLOAT16)
-    CASE_TYPE(DOUBLE)
-    CASE_TYPE(UINT32)
-    CASE_TYPE(UINT64)
-    CASE_TYPE(COMPLEX64)
-    CASE_TYPE(COMPLEX128)
-    CASE_TYPE(BFLOAT16)
-    default:
-      return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
-  }
-}
-
-ONNXTensorElementDataType GetTensorElementType(const onnx::TensorProto& tensor_proto) {
-  return CApiElementTypeFromProtoType(tensor_proto.data_type());
-}
-
-Status TensorProtoToMLValue(const onnx::TensorProto& tensor_proto, const MemBuffer& m, Ort::Value& value) {
-  const OrtAllocatorInfo& allocator = m.GetAllocInfo();
-  ONNXTensorElementDataType ele_type = perftest::GetTensorElementType(tensor_proto);
-  const void* raw_data = nullptr;
-  size_t raw_data_len = 0;
-  void* tensor_data;
-  {
-    if (tensor_proto.data_location() == onnx::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL) {
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Server doesn't support external data.");
-    } else if (tensor_proto.has_raw_data()) {
-      if (ele_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING)
-        return Status(common::ONNXRUNTIME, common::FAIL, "String tensor cannot have raw data.");
-      raw_data = tensor_proto.raw_data().data();
-      raw_data_len = tensor_proto.raw_data().size();
-    }
-    {
-      void* preallocated = m.GetBuffer();
-      size_t preallocated_size = m.GetLen();
-      int64_t tensor_size = 1;
-      {
-        for (auto i : tensor_proto.dims()) {
-          if (i < 0) return Status(common::ONNXRUNTIME, common::FAIL, "Tensor can't contain negative dims");
-          tensor_size *= i;
-        }
-      }
-      // tensor_size could be zero. see test_slice_start_out_of_bounds\test_data_set_0\output_0.pb
-      if (static_cast<uint64_t>(tensor_size) > SIZE_MAX) {
-        return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Size overflow");
-      }
-      size_t size_to_allocate;
-      GetSizeInBytesFromTensorProto<0>(tensor_proto, &size_to_allocate);
-
-      if (preallocated && preallocated_size < size_to_allocate)
-        return Status(common::ONNXRUNTIME, common::FAIL, MakeString("The buffer planner is not consistent with tensor buffer size, expected ", size_to_allocate, ", got ", preallocated_size));
-      switch (tensor_proto.data_type()) {
-        CASE_PROTO(FLOAT, float);
-        CASE_PROTO(DOUBLE, double);
-        CASE_PROTO(BOOL, bool);
-        CASE_PROTO(INT8, int8_t);
-        CASE_PROTO(INT16, int16_t);
-        CASE_PROTO(INT32, int32_t);
-        CASE_PROTO(INT64, int64_t);
-        CASE_PROTO(UINT8, uint8_t);
-        CASE_PROTO(UINT16, uint16_t);
-        CASE_PROTO(UINT32, uint32_t);
-        CASE_PROTO(UINT64, uint64_t);
-        CASE_PROTO(FLOAT16, MLFloat16);
-        CASE_PROTO(BFLOAT16, BFloat16);
-        case onnx::TensorProto_DataType::TensorProto_DataType_STRING:
-          if (preallocated != nullptr) {
-            OrtInitializeBufferForTensor(preallocated, preallocated_size, ele_type);
-          }
-          ::onnxruntime::perftest::UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
-                                                             (std::string*)preallocated, tensor_size);
-          break;
-        default: {
-          std::ostringstream ostr;
-          ostr << "Initialized tensor with unexpected type: " << tensor_proto.data_type();
-          return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
-        }
-      }
-      tensor_data = preallocated;
-    }
-  }
-  std::vector<int64_t> tensor_shape_vec = GetTensorShapeFromTensorProto(tensor_proto);
-  // Note: We permit an empty tensor_shape_vec, and treat it as a scalar (a tensor of size 1).
-  value = Ort::Value::CreateTensor(&allocator, tensor_data, m.GetLen(), tensor_shape_vec.data(), tensor_shape_vec.size(), (ONNXTensorElementDataType)tensor_proto.data_type());
-  return Status::OK();
-}
-template Status GetSizeInBytesFromTensorProto<256>(const onnx::TensorProto& tensor_proto,
-                                                   size_t* out);
-template Status GetSizeInBytesFromTensorProto<0>(const onnx::TensorProto& tensor_proto, size_t* out);
-}  // namespace perftest
-}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/test/perftest/tensorprotoutils.h b/onnxruntime/test/perftest/tensorprotoutils.h
deleted file mode 100644
index e4c18edaab3b0..0000000000000
--- a/onnxruntime/test/perftest/tensorprotoutils.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma once
-
-#include <vector>
-#include <type_traits>
-#include "core/session/onnxruntime_c_api.h"
-#include "core/session/onnxruntime_cxx_api.h"
-
-#include "mem_buffer.h"
-
-namespace onnx {
-class TensorProto;
-}
-
-namespace onnxruntime {
-namespace perftest {
-// How much memory it will need for putting the content of this tensor into a plain array
-// complex64/complex128 tensors are not supported.
-// The output value could be zero or -1.
-template <size_t alignment>
-common::Status GetSizeInBytesFromTensorProto(const onnx::TensorProto& tensor_proto, size_t* out);
-/**
- * deserialize a TensorProto into a preallocated memory buffer.
- *  Impl must correspond to onnxruntime/core/framework/tensorprotoutils.cc
- * This implementation does not support external data so as to reduce dependency surface.
- */
-common::Status TensorProtoToMLValue(const onnx::TensorProto& input, const MemBuffer& m, /* out */ Ort::Value& value);
-
-template <typename T>
-void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                  /*out*/ T* p_data, int64_t expected_size);
-
-ONNXTensorElementDataType CApiElementTypeFromProtoType(int type);
-ONNXTensorElementDataType GetTensorElementType(const onnx::TensorProto& tensor_proto);
-}  // namespace perftest
-}  // namespace onnxruntime
\ No newline at end of file

From 9e8321dec69866048fb83b2448b7019567c8adf7 Mon Sep 17 00:00:00 2001
From: Pranav Sharma <prs@microsoft.com>
Date: Sat, 10 Aug 2019 21:17:59 -0700
Subject: [PATCH 5/5] Fix mem leak

---
 cmake/onnxruntime_unittests.cmake             |  3 +
 .../core/session/onnxruntime_c_api.h          |  1 -
 onnxruntime/test/onnx/TestCase.cc             | 29 +++++--
 onnxruntime/test/onnx/TestCase.h              |  2 +-
 onnxruntime/test/onnx/callback.cc             | 16 ++++
 onnxruntime/test/onnx/callback.h              | 17 ++++
 onnxruntime/test/onnx/heap_buffer.cc          | 14 +++-
 onnxruntime/test/onnx/heap_buffer.h           |  9 +-
 onnxruntime/test/onnx/runner.cc               | 82 +++++++++----------
 onnxruntime/test/onnx/tensorprotoutils.cc     | 45 ++++++++--
 onnxruntime/test/onnx/tensorprotoutils.h      |  5 +-
 .../test/perftest/performance_runner.h        |  2 +-
 12 files changed, 161 insertions(+), 64 deletions(-)
 create mode 100644 onnxruntime/test/onnx/callback.cc
 create mode 100644 onnxruntime/test/onnx/callback.h

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 0b3fad15d28ba..368ee8790d718 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -472,6 +472,9 @@ set(onnx_test_runner_common_srcs
   ${onnx_test_runner_src_dir}/onnxruntime_event.h
   ${onnx_test_runner_src_dir}/sync_api.h
   ${onnx_test_runner_src_dir}/sync_api.cc
+  ${onnx_test_runner_src_dir}/callback.h
+  ${onnx_test_runner_src_dir}/callback.cc
+  ${onnx_test_runner_src_dir}/mem_buffer.h
   ${onnx_test_runner_src_dir}/tensorprotoutils.h
   ${onnx_test_runner_src_dir}/tensorprotoutils.cc)
 
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index d67efd5204f2f..fad81a5359610 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -150,7 +150,6 @@ ORT_RUNTIME_CLASS(RunOptions);
 ORT_RUNTIME_CLASS(TypeInfo);
 ORT_RUNTIME_CLASS(TensorTypeAndShapeInfo);
 ORT_RUNTIME_CLASS(SessionOptions);
-ORT_RUNTIME_CLASS(Callback);
 ORT_RUNTIME_CLASS(CustomOpDomain);
 ORT_RUNTIME_CLASS(Allocator);
 
diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc
index 1fdb2cd9668c2..fe43b296b6046 100644
--- a/onnxruntime/test/onnx/TestCase.cc
+++ b/onnxruntime/test/onnx/TestCase.cc
@@ -266,7 +266,7 @@ static void SortTensorFileNames(std::vector<std::basic_string<PATH_CHAR_TYPE>>&
   }
 }
 
-OrtValue* TensorToOrtValue(const ONNX_NAMESPACE::TensorProto& t, HeapBuffer& b) {
+OrtValue* TensorToOrtValue(const ONNX_NAMESPACE::TensorProto& t, onnxruntime::test::HeapBuffer& b) {
   size_t len = 0;
   auto status = onnxruntime::test::GetSizeInBytesFromTensorProto<0>(t, &len);
   if (!status.IsOK()) {
@@ -274,17 +274,22 @@ OrtValue* TensorToOrtValue(const ONNX_NAMESPACE::TensorProto& t, HeapBuffer& b)
   }
   void* p = len == 0 ? nullptr : b.AllocMemory(len);
   Ort::Value temp_value{nullptr};
+  auto d = std::make_unique<onnxruntime::test::OrtCallback>();
   OrtAllocatorInfo cpu_allocator_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
   status = onnxruntime::test::TensorProtoToMLValue(t, onnxruntime::test::MemBuffer(p, len, cpu_allocator_info),
-                                                   temp_value);
+                                                   temp_value, *d);
   if (!status.IsOK()) {
     ORT_THROW(status.ToString());
   }
+  if (d->f) {
+    b.AddDeleter(d.release());
+  }
   return temp_value.release();
 }
 
 void LoopDataFile(int test_data_pb_fd, bool is_input, const TestModelInfo* modelinfo,
-                  std::unordered_map<std::string, OrtValue*>& name_data_map, HeapBuffer& b, std::ostringstream& oss) {
+                  std::unordered_map<std::string, OrtValue*>& name_data_map, onnxruntime::test::HeapBuffer& b,
+                  std::ostringstream& oss) {
   google::protobuf::io::FileInputStream f(test_data_pb_fd);
   f.SetCloseOnDelete(true);
   google::protobuf::io::CodedInputStream coded_input(&f);
@@ -392,7 +397,8 @@ class OnnxTestCase : public ITestCase {
     return std::string();
   }
 
-  void ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto>& test_data_pbs, HeapBuffer& b, bool is_input,
+  void ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto>& test_data_pbs, onnxruntime::test::HeapBuffer& b,
+                       bool is_input,
                        std::unordered_map<std::string, OrtValue*>& out);
 
   std::once_flag model_parsed_;
@@ -427,7 +433,8 @@ class OnnxTestCase : public ITestCase {
   std::string GetTestCaseVersion() const override {
     return model_info_->GetModelVersion();
   }
-  void LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>&, bool is_input) override;
+  void LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, std::unordered_map<std::string, OrtValue*>&,
+                    bool is_input) override;
 };
 
 ITestCase* CreateOnnxTestCase(const std::string& test_case_name, TestModelInfo* model,
@@ -500,7 +507,8 @@ static void LoadTensors(const std::vector<PATH_STRING_TYPE>& pb_files,
   }
 }
 
-void OnnxTestCase::LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>& name_data_map,
+void OnnxTestCase::LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b,
+                                std::unordered_map<std::string, OrtValue*>& name_data_map,
                                 bool is_input) {
   if (id >= test_data_dirs_.size()) {
     ORT_THROW("index out of bound");
@@ -553,7 +561,8 @@ void OnnxTestCase::LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std
   ConvertTestData(test_data_pbs, b, is_input, name_data_map);
 }
 
-void OnnxTestCase::ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto>& test_data_pbs, HeapBuffer& b,
+void OnnxTestCase::ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto>& test_data_pbs,
+                                   onnxruntime::test::HeapBuffer& b,
                                    bool is_input, std::unordered_map<std::string, OrtValue*>& out) {
   bool has_valid_names = true;
   std::vector<std::string> var_names(test_data_pbs.size());
@@ -585,12 +594,16 @@ void OnnxTestCase::ConvertTestData(const std::vector<ONNX_NAMESPACE::TensorProto
     }
     void* p = len == 0 ? nullptr : b.AllocMemory(len);
     Ort::Value v1{nullptr};
+    auto d = std::make_unique<onnxruntime::test::OrtCallback>();
     OrtAllocatorInfo cpu_allocator_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
     status = onnxruntime::test::TensorProtoToMLValue(input, onnxruntime::test::MemBuffer(p, len, cpu_allocator_info),
-                                                     v1);
+                                                     v1, *d);
     if (!status.IsOK()) {
       ORT_THROW(status.ToString());
     }
+    if (d->f) {
+      b.AddDeleter(d.release());
+    }
     out.insert(std::make_pair(name, v1.release()));
   }
 }
diff --git a/onnxruntime/test/onnx/TestCase.h b/onnxruntime/test/onnx/TestCase.h
index 119f1673125ad..66663a0450d8a 100644
--- a/onnxruntime/test/onnx/TestCase.h
+++ b/onnxruntime/test/onnx/TestCase.h
@@ -19,7 +19,7 @@ class ValueInfoProto;
 //One test case can contain multiple test data(input/output pairs)
 class ITestCase {
  public:
-  virtual void LoadTestData(size_t id, HeapBuffer& b, std::unordered_map<std::string, OrtValue*>& name_data_map,
+  virtual void LoadTestData(size_t id, onnxruntime::test::HeapBuffer& b, std::unordered_map<std::string, OrtValue*>& name_data_map,
                             bool is_input) = 0;
   virtual const PATH_CHAR_TYPE* GetModelUrl() const = 0;
   virtual const std::string& GetNodeName() const = 0;
diff --git a/onnxruntime/test/onnx/callback.cc b/onnxruntime/test/onnx/callback.cc
new file mode 100644
index 0000000000000..99b3b7a6bd303
--- /dev/null
+++ b/onnxruntime/test/onnx/callback.cc
@@ -0,0 +1,16 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "callback.h"
+
+namespace onnxruntime {
+namespace test {
+void OrtRunCallback(OrtCallback* f) noexcept {
+  if (f == nullptr) return;
+  if (f->f != nullptr) {
+    f->f(f->param);
+    delete f;
+  }
+}
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/onnx/callback.h b/onnxruntime/test/onnx/callback.h
new file mode 100644
index 0000000000000..c548b57486b21
--- /dev/null
+++ b/onnxruntime/test/onnx/callback.h
@@ -0,0 +1,17 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#pragma once
+
+namespace onnxruntime {
+namespace test {
+struct OrtCallback {
+  void (*f)(void* param) noexcept;
+  void* param;
+};
+
+/**
+ *  f will be freed in this call
+ */
+void OrtRunCallback(OrtCallback* f) noexcept;
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/onnx/heap_buffer.cc b/onnxruntime/test/onnx/heap_buffer.cc
index e2b7d8dac826f..aca75de061268 100644
--- a/onnxruntime/test/onnx/heap_buffer.cc
+++ b/onnxruntime/test/onnx/heap_buffer.cc
@@ -3,9 +3,21 @@
 
 #include "heap_buffer.h"
 #include "core/session/onnxruntime_c_api.h"
+#include "callback.h"
+
+namespace onnxruntime {
+namespace test {
+void HeapBuffer::AddDeleter(OrtCallback* d) {
+  if (d != nullptr) deleters_.push_back(d);
+}
 
 HeapBuffer::~HeapBuffer() {
+  for (auto d : deleters_) {
+    OrtRunCallback(d);
+  }
   for (void* p : buffers_) {
     free(p);
   }
-}
\ No newline at end of file
+}
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/onnx/heap_buffer.h b/onnxruntime/test/onnx/heap_buffer.h
index f888818ef1f4c..b4abf131b1b21 100644
--- a/onnxruntime/test/onnx/heap_buffer.h
+++ b/onnxruntime/test/onnx/heap_buffer.h
@@ -5,6 +5,9 @@
 #include <vector>
 #include <memory>
 
+namespace onnxruntime {
+namespace test {
+struct OrtCallback;
 /**
  * A holder for delay freed buffers
  */
@@ -20,7 +23,11 @@ class HeapBuffer {
     buffers_.push_back(p);
     return p;
   }
+  void AddDeleter(OrtCallback* d);
 
  private:
+  std::vector<OrtCallback*> deleters_;
   std::vector<void*> buffers_;
-};
\ No newline at end of file
+};
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/onnx/runner.cc b/onnxruntime/test/onnx/runner.cc
index 575292fbc6dd3..7827b4ff8a95a 100644
--- a/onnxruntime/test/onnx/runner.cc
+++ b/onnxruntime/test/onnx/runner.cc
@@ -27,45 +27,44 @@
 using namespace onnxruntime;
 using ::onnxruntime::common::Status;
 
-// Permanently exclude following tests because ORT support only opset staring from 7, 
+// Permanently exclude following tests because ORT support only opset staring from 7,
 // Please make no more changes to the list
-const std::set<std::string> immutable_broken_tests = 
-{
-    "AvgPool1d", 
-    "AvgPool1d_stride",
-    "AvgPool2d",
-    "AvgPool2d_stride",
-    "AvgPool3d",
-    "AvgPool3d_stride",
-    "AvgPool3d_stride1_pad0_gpu_input",
-    "BatchNorm1d_3d_input_eval",
-    "BatchNorm2d_eval",
-    "BatchNorm2d_momentum_eval",
-    "BatchNorm3d_eval",
-    "BatchNorm3d_momentum_eval",
-    "GLU",
-    "GLU_dim",
-    "Linear",
-    "PReLU_1d",
-    "PReLU_1d_multiparam",
-    "PReLU_2d",
-    "PReLU_2d_multiparam",
-    "PReLU_3d",
-    "PReLU_3d_multiparam",
-    "PoissonNLLLLoss_no_reduce",
-    "Softsign",
-    "operator_add_broadcast",
-    "operator_add_size1_broadcast",
-    "operator_add_size1_right_broadcast",
-    "operator_add_size1_singleton_broadcast",
-    "operator_addconstant",
-    "operator_addmm",
-    "operator_basic",
-    "operator_mm",
-    "operator_non_float_params",
-    "operator_params", 
-    "operator_pow"
-};
+const std::set<std::string> immutable_broken_tests =
+    {
+        "AvgPool1d",
+        "AvgPool1d_stride",
+        "AvgPool2d",
+        "AvgPool2d_stride",
+        "AvgPool3d",
+        "AvgPool3d_stride",
+        "AvgPool3d_stride1_pad0_gpu_input",
+        "BatchNorm1d_3d_input_eval",
+        "BatchNorm2d_eval",
+        "BatchNorm2d_momentum_eval",
+        "BatchNorm3d_eval",
+        "BatchNorm3d_momentum_eval",
+        "GLU",
+        "GLU_dim",
+        "Linear",
+        "PReLU_1d",
+        "PReLU_1d_multiparam",
+        "PReLU_2d",
+        "PReLU_2d_multiparam",
+        "PReLU_3d",
+        "PReLU_3d_multiparam",
+        "PoissonNLLLLoss_no_reduce",
+        "Softsign",
+        "operator_add_broadcast",
+        "operator_add_size1_broadcast",
+        "operator_add_size1_right_broadcast",
+        "operator_add_size1_singleton_broadcast",
+        "operator_addconstant",
+        "operator_addmm",
+        "operator_basic",
+        "operator_mm",
+        "operator_non_float_params",
+        "operator_params",
+        "operator_pow"};
 
 void ORT_CALLBACK RunTestCase(ORT_CALLBACK_INSTANCE pci, void* context, ORT_WORK work) {
   OnnxRuntimeCloseThreadpoolWork(work);
@@ -232,13 +231,13 @@ Status RunTests(TestEnv& env, int p_models, int concurrent_runs, size_t repeat_c
   }
   for (size_t i = 0; i != env.tests.size(); ++i) {
     if (!results[i]) {
-      stat.AddFailedTest(std::pair<std::string,std::string>(env.tests[i]->GetTestCaseName(), env.tests[i]->GetTestCaseVersion()));
+      stat.AddFailedTest(std::pair<std::string, std::string>(env.tests[i]->GetTestCaseName(), env.tests[i]->GetTestCaseVersion()));
       continue;
     }
     const TestCaseResult& r = *results[i];
     for (const EXECUTE_RESULT res : r.GetExcutionResult()) {
       if (res != EXECUTE_RESULT::SUCCESS && res != EXECUTE_RESULT::NOT_SUPPORT) {
-        stat.AddFailedTest(std::pair<std::string,std::string>(env.tests[i]->GetTestCaseName(),env.tests[i]->GetTestCaseVersion()));
+        stat.AddFailedTest(std::pair<std::string, std::string>(env.tests[i]->GetTestCaseName(), env.tests[i]->GetTestCaseVersion()));
       }
       switch (res) {
         case EXECUTE_RESULT::SUCCESS:
@@ -347,7 +346,7 @@ void DataRunner::RunTask(size_t task_id, ORT_CALLBACK_INSTANCE pci, bool store_r
 }
 
 EXECUTE_RESULT DataRunner::RunTaskImpl(size_t task_id) {
-  HeapBuffer holder;
+  onnxruntime::test::HeapBuffer holder;
   std::unordered_map<std::string, OrtValue*> feeds;
   c_->LoadTestData(task_id, holder, feeds, true);
 
@@ -499,7 +498,6 @@ void SeqTestRunner::Start(ORT_CALLBACK_INSTANCE pci, size_t) {
 }
 
 void RunSingleTestCase(ITestCase* info, Ort::Env& env, const Ort::SessionOptions& sf, size_t concurrent_runs, size_t repeat_count, PThreadPool tpool, ORT_CALLBACK_INSTANCE pci, TestCaseCallBack on_finished) {
-
   //for test in immutable list, do not even run it
   if (immutable_broken_tests.find(info->GetTestCaseName()) != immutable_broken_tests.end()) {
     on_finished(std::make_shared<TestCaseResult>(0, EXECUTE_RESULT::NOT_SUPPORT, info->GetNodeName()), pci);
diff --git a/onnxruntime/test/onnx/tensorprotoutils.cc b/onnxruntime/test/onnx/tensorprotoutils.cc
index ccff84e7841c3..27442273f26fa 100644
--- a/onnxruntime/test/onnx/tensorprotoutils.cc
+++ b/onnxruntime/test/onnx/tensorprotoutils.cc
@@ -11,6 +11,12 @@
 #include "core/framework/allocator.h"
 #include "core/session/onnxruntime_cxx_api.h"
 #include "core/graph/onnx_protobuf.h"
+#include "callback.h"
+
+struct OrtStatus {
+  OrtErrorCode code;
+  char msg[1];  // a null-terminated string
+};
 
 namespace onnxruntime {
 namespace test {
@@ -301,19 +307,37 @@ struct UnInitializeParam {
   ONNXTensorElementDataType ele_type;
 };
 
-void OrtInitializeBufferForTensor(void* input, size_t input_len,
-                                  ONNXTensorElementDataType type) {
+OrtStatus* OrtInitializeBufferForTensor(void* input, size_t input_len,
+                                        ONNXTensorElementDataType type) {
   try {
-    if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return;
+    if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return nullptr;
     size_t tensor_size = input_len / sizeof(std::string);
     std::string* ptr = reinterpret_cast<std::string*>(input);
     for (size_t i = 0, n = tensor_size; i < n; ++i) {
       new (ptr + i) std::string();
     }
   } catch (std::exception& ex) {
-    throw Ort::Exception(ex.what(), OrtErrorCode::ORT_RUNTIME_EXCEPTION);
+    return OrtCreateStatus(ORT_RUNTIME_EXCEPTION, ex.what());
+  }
+  return nullptr;
+}
+
+ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enum ONNXTensorElementDataType type);
+
+static void UnInitTensor(void* param) noexcept {
+  UnInitializeParam* p = reinterpret_cast<UnInitializeParam*>(param);
+  OrtUninitializeBuffer(p->preallocated, p->preallocated_size, p->ele_type);
+  delete p;
+}
+
+ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enum ONNXTensorElementDataType type) {
+  if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return;
+  size_t tensor_size = input_len / sizeof(std::string);
+  std::string* ptr = reinterpret_cast<std::string*>(input);
+  using std::string;
+  for (size_t i = 0, n = tensor_size; i < n; ++i) {
+    ptr[i].~string();
   }
-  return;
 }
 
 #define CASE_PROTO(X, Y)                                                                                       \
@@ -352,7 +376,8 @@ ONNXTensorElementDataType GetTensorElementType(const onnx::TensorProto& tensor_p
   return CApiElementTypeFromProtoType(tensor_proto.data_type());
 }
 
-Status TensorProtoToMLValue(const onnx::TensorProto& tensor_proto, const MemBuffer& m, Ort::Value& value) {
+Status TensorProtoToMLValue(const onnx::TensorProto& tensor_proto, const MemBuffer& m, Ort::Value& value,
+                            OrtCallback& deleter) {
   const OrtAllocatorInfo& allocator = m.GetAllocInfo();
   ONNXTensorElementDataType ele_type = test::GetTensorElementType(tensor_proto);
   const void* raw_data = nullptr;
@@ -402,7 +427,13 @@ Status TensorProtoToMLValue(const onnx::TensorProto& tensor_proto, const MemBuff
         CASE_PROTO(BFLOAT16, BFloat16);
         case onnx::TensorProto_DataType::TensorProto_DataType_STRING:
           if (preallocated != nullptr) {
-            OrtInitializeBufferForTensor(preallocated, preallocated_size, ele_type);
+            OrtStatus* status = OrtInitializeBufferForTensor(preallocated, preallocated_size, ele_type);
+            if (status != nullptr) {
+              OrtReleaseStatus(status);
+              return Status(common::ONNXRUNTIME, common::FAIL, "initialize preallocated buffer failed");
+            }
+            deleter.f = UnInitTensor;
+            deleter.param = new UnInitializeParam{preallocated, preallocated_size, ele_type};
           }
           ::onnxruntime::test::UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
                                                          (std::string*)preallocated, tensor_size);
diff --git a/onnxruntime/test/onnx/tensorprotoutils.h b/onnxruntime/test/onnx/tensorprotoutils.h
index c44d2e2132992..ab3bb7dc821c2 100644
--- a/onnxruntime/test/onnx/tensorprotoutils.h
+++ b/onnxruntime/test/onnx/tensorprotoutils.h
@@ -7,7 +7,7 @@
 #include <type_traits>
 #include "core/session/onnxruntime_c_api.h"
 #include "core/session/onnxruntime_cxx_api.h"
-
+#include "callback.h"
 #include "mem_buffer.h"
 
 namespace onnx {
@@ -26,7 +26,8 @@ common::Status GetSizeInBytesFromTensorProto(const onnx::TensorProto& tensor_pro
  *  Impl must correspond to onnxruntime/core/framework/tensorprotoutils.cc
  * This implementation does not support external data so as to reduce dependency surface.
  */
-common::Status TensorProtoToMLValue(const onnx::TensorProto& input, const MemBuffer& m, /* out */ Ort::Value& value);
+common::Status TensorProtoToMLValue(const onnx::TensorProto& input, const MemBuffer& m, /* out */ Ort::Value& value,
+                                    OrtCallback& deleter);
 
 template <typename T>
 void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
diff --git a/onnxruntime/test/perftest/performance_runner.h b/onnxruntime/test/perftest/performance_runner.h
index d4abaceeea82d..8d9cf1d808155 100644
--- a/onnxruntime/test/perftest/performance_runner.h
+++ b/onnxruntime/test/perftest/performance_runner.h
@@ -128,7 +128,7 @@ class PerformanceRunner {
   PerformanceTestConfig performance_test_config_;
   TestModelInfo* test_model_info_;
   std::unique_ptr<TestSession> session_;
-  HeapBuffer b_;
+  onnxruntime::test::HeapBuffer b_;
   std::unique_ptr<ITestCase> test_case_;
 
   // TODO: Convert to OrtMutex