From ac4309a1ed95e869cf15321ba2f78e856c1df27e Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Thu, 4 Jul 2019 15:43:45 -0700
Subject: [PATCH 01/23] Model serialization

---
 .../Microsoft.ML.OnnxRuntime.csproj           | 24 ++++++++++
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  3 ++
 .../SessionOptions.cs                         |  9 ++++
 .../InferenceTest.cs                          | 13 ++++-
 .../core/session/onnxruntime_c_api.h          |  3 ++
 .../core/session/onnxruntime_cxx_api.h        |  2 +
 .../core/session/onnxruntime_cxx_inline.h     |  5 ++
 onnxruntime/core/providers/cpu/symbols.txt    |  2 +
 .../core/session/abi_session_options.cc       |  6 +++
 onnxruntime/core/session/inference_session.cc |  5 ++
 onnxruntime/core/session/inference_session.h  |  3 ++
 .../test/framework/inference_session_test.cc  | 47 +++++++++++++++++++
 onnxruntime/test/perftest/ort_test_session.cc |  2 +
 13 files changed, 123 insertions(+), 1 deletion(-)
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
index bffa46e623c56..0bc9a0ed2dffc 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
@@ -46,6 +46,30 @@
           CopyToOutputDirectory="Never"
           Visible="false"
     />
+    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_api.h"
+          PackagePath="\build\native\include"
+          Pack="true"
+          CopyToOutputDirectory="Never"
+          Visible="false"
+    />
+    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_inline.h"
+          PackagePath="\build\native\include"
+          Pack="true"
+          CopyToOutputDirectory="Never"
+          Visible="false"
+    />
+    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_api.h"
+          PackagePath="\build\native\include"
+          Pack="true"
+          CopyToOutputDirectory="Never"
+          Visible="false"
+    />
+    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_inline.h"
+          PackagePath="\build\native\include"
+          Pack="true"
+          CopyToOutputDirectory="Never"
+          Visible="false"
+    />
     <None Include="$(NativeBuildOutputDir)\onnxruntime.lib"
           PackagePath="\runtimes\win-$(TargetArchitecture)\native"
           Pack="true"
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 7fd2c33c6a6cb..e0bf31d76fd23 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -130,6 +130,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtDisableSequentialExecution(IntPtr /*(OrtSessionOptions*)*/ options);
 
+        [DllImport(nativeLib, CharSet = charSet)]
+        public static extern IntPtr /*(OrtStatus*)*/ OrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, string optimizedModelFilepath);
+
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtEnableProfiling(IntPtr /* OrtSessionOptions* */ options, string profilePathPrefix);
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index 4ce708687ef79..13a334e0fd865 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -37,6 +37,15 @@ public void SetSessionGraphOptimizationLevel(uint optimization_level)
             NativeApiStatus.VerifySuccess(NativeMethods.OrtSetSessionGraphOptimizationLevel(_nativePtr, optimization_level));
         }
 
+        /// <summary>
+        ///  Set filepath to save optimized model after graph level transformations.
+        /// </summary>
+        /// <param name="optimizedModelFilepath">File path for saving optimized model.</param>
+        public void SetOptimizedModelFilePath(string optimizedModelFilepath)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSetOptimizedModelFilePath(_nativePtr, optimizedModelFilepath));
+        }
+
         /// <summary>
         /// Enable Sequential Execution. By default, it is enabled.
         /// </summary>
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 31c0ee67433fa..b406d21106b6c 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -656,6 +656,17 @@ private void TestModelSequenceOfMapStringFloat()
             }
         }
 
+        [Fact]
+        private void TestModelSerialization()
+        {
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
+
+            // Set the graph optimization level for this session.
+            SessionOptions options = new SessionOptions();
+            options.SetOptimizedModelFilePath("squeezenet-Transform1.onnx");
+            var session = new InferenceSession(modelPath, options);
+        }
+
         [GpuFact]
         private void TestGpu()
         {
@@ -696,7 +707,7 @@ private void VerifyNativeMethodsExist()
             "OrtEnableSequentialExecution","OrtDisableSequentialExecution","OrtEnableProfiling","OrtDisableProfiling",
             "OrtEnableMemPattern","OrtDisableMemPattern","OrtEnableCpuMemArena","OrtDisableCpuMemArena",
             "OrtSetSessionLogId","OrtSetSessionLogVerbosityLevel","OrtSetSessionThreadPoolSize","OrtSetSessionGraphOptimizationLevel",
-            "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo",
+            "OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo",
             "OrtCreateDefaultAllocator","OrtAllocatorFree","OrtAllocatorGetInfo",
             "OrtCreateTensorWithDataAsOrtValue","OrtGetTensorMutableData", "OrtReleaseAllocatorInfo",
             "OrtCastTypeInfoToTensorInfo","OrtGetTensorTypeAndShape","OrtGetTensorElementType","OrtGetDimensionsCount",
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 03bf3a4467df3..8c34399334f85 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -201,6 +201,9 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess,
  */
 ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output);
 
+// Set filepath to save optimized model after graph level transformations.
+ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* in, _In_ const ORTCHAR_T* optimized_model_filepath);
+
 // create a copy of an existing OrtSessionOptions
 ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output);
 ORT_API_STATUS(OrtEnableSequentialExecution, _In_ OrtSessionOptions* options);
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index df15d2d2ecde6..7956b5146e48e 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -139,6 +139,8 @@ struct SessionOptions : Base<OrtSessionOptions> {
   SessionOptions& EnableCpuMemArena();
   SessionOptions& DisableCpuMemArena();
 
+  SessionOptions& SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_file);
+
   SessionOptions& EnableProfiling(const ORTCHAR_T* profile_file_prefix);
   SessionOptions& DisableProfiling();
 
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 970155aeaa383..39b7856084b25 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -137,6 +137,11 @@ inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(uint32_t graph_
   return *this;
 }
 
+inline SessionOptions& SessionOptions::SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_filepath) {
+  ORT_THROW_ON_ERROR(OrtSetOptimizedModelFilePath(p_, optimized_model_filepath));
+  return *this;
+}
+
 inline SessionOptions& SessionOptions::EnableProfiling(const ORTCHAR_T* profile_file_prefix) {
   ORT_THROW_ON_ERROR(OrtEnableProfiling(p_, profile_file_prefix));
   return *this;
diff --git a/onnxruntime/core/providers/cpu/symbols.txt b/onnxruntime/core/providers/cpu/symbols.txt
index 265b10260b342..658ffcf928d93 100644
--- a/onnxruntime/core/providers/cpu/symbols.txt
+++ b/onnxruntime/core/providers/cpu/symbols.txt
@@ -80,6 +80,8 @@ OrtSetDimensions
 OrtSetSessionGraphOptimizationLevel
 OrtSetSessionLogId
 OrtSetSessionLogVerbosityLevel
+OrtSetSessionGraphOptimizationLevel
+OrtSetOptimizedModelFilePath
 OrtSetSessionThreadPoolSize
 OrtSetTensorElementType
 OrtTensorProtoToOrtValue
diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc
index aeaab0b2488da..8027e99ec21fd 100644
--- a/onnxruntime/core/session/abi_session_options.cc
+++ b/onnxruntime/core/session/abi_session_options.cc
@@ -44,6 +44,12 @@ ORT_API_STATUS_IMPL(OrtDisableSequentialExecution, _In_ OrtSessionOptions* optio
   return nullptr;
 }
 
+// set filepath to save optimized onnx model.
+ORT_API_STATUS_IMPL(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath) {
+  options->value.optimized_model_filepath = optimized_model_filepath;
+  return nullptr;
+}
+
 // enable profiling for this session.
 ORT_API_STATUS_IMPL(OrtEnableProfiling, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* profile_file_prefix) {
   options->value.enable_profiling = true;
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index b421855f4e8aa..2b7fad42ba8c1 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -510,6 +510,11 @@ common::Status InferenceSession::Initialize() {
     // now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs.
     ORT_RETURN_IF_ERROR(graph.Resolve());
 
+    if (!session_options_.optimized_model_filepath.empty()) {
+      // Serialize optimized onnx model.
+      Model::Save(*model_, session_options_.optimized_model_filepath);
+    }
+
     ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution));
     ORT_RETURN_IF_ERROR(session_initializer.InitializeAndSave(nullptr));
 
diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
index d1a7a57f046df..ae0ee2ae56185 100644
--- a/onnxruntime/core/session/inference_session.h
+++ b/onnxruntime/core/session/inference_session.h
@@ -56,6 +56,9 @@ struct SessionOptions {
   // enable profiling for this session.
   bool enable_profiling = false;
 
+  // non empty filepath enables serialization of the transformed optimized model to the specified filepath.
+  std::basic_string<ORTCHAR_T> optimized_model_filepath;
+
   // enable the memory pattern optimization.
   // The idea is if the input shapes are the same, we could trace the internal memory allocation
   // and generate a memory pattern for future request. So next time we could just do one allocation
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index 66e5f9c0bfed0..c7879432c6593 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -332,6 +332,53 @@ TEST(InferenceSessionTests, DisableCPUArena) {
   RunModel(session_object, run_options);
 }
 
+TEST(InferenceSessionTests, TestModelSerialization) {
+  // Load model with level1 tranform level and serialize model the after transformation.
+  SessionOptions so;
+  const string test_model = "testdata/transform/abs-id-max.onnx";
+  so.session_logid = "InferenceSessionTests.TestModelSerialization";
+  so.enable_cpu_mem_arena = false;
+  so.graph_optimization_level = TransformerLevel::Level1;
+  so.optimized_model_filepath = ToWideString(test_model) + L"-TransformLevel-" + std::to_wstring(static_cast<uint32_t>(so.graph_optimization_level));
+  InferenceSession session_object{so, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object.Load(test_model).IsOK());
+  ASSERT_TRUE(session_object.Initialize().IsOK());
+
+ // Serialize model to the same file path again to make sure that rewrite doesn't fail.
+  InferenceSession overwrite_session_object{so, &DefaultLoggingManager()};
+  ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK());
+  ASSERT_TRUE(overwrite_session_object.Initialize().IsOK());
+
+  // Load serialized model with no tranform level and serialize model.
+  SessionOptions so_opt;
+  so_opt.session_logid = "InferenceSessionTests.TestModelSerialization";
+  so_opt.enable_cpu_mem_arena = false;
+  so_opt.graph_optimization_level = TransformerLevel::Default;
+  so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + L"-TransformLevel-" + std::to_wstring(static_cast<uint32_t>(so_opt.graph_optimization_level));
+  InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK());
+  ASSERT_TRUE(session_object_opt.Initialize().IsOK());
+  
+  // Assert that refeed of optimized model results with default transform level results 
+  // in same runtime model as mlnet_encoder.onnx with TransformLevel-1.
+  std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary);
+  ASSERT_TRUE(model_fs_session1.good());
+  std::ifstream model_fs_session2(so_opt.optimized_model_filepath, ios::in | ios::binary);
+  ASSERT_TRUE(model_fs_session2.good());
+  ASSERT_TRUE(model_fs_session1.tellg() == model_fs_session2.tellg());
+  model_fs_session1.seekg(0, std::ifstream::beg);
+  model_fs_session2.seekg(0, std::ifstream::beg);
+  ASSERT_TRUE(std::equal(std::istreambuf_iterator<char>(model_fs_session1.rdbuf()),
+                         std::istreambuf_iterator<char>(),
+                         std::istreambuf_iterator<char>(model_fs_session2.rdbuf())));
+
+  // Execute with empty optimized model filepath doesn't fail loading.
+  so_opt.optimized_model_filepath = L"";
+  InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK());
+  ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK());
+}
+
 #ifdef ORT_RUN_EXTERNAL_ONNX_TESTS
 static bool Compare(const InputDefList& f_arg, const InputDefList& s_arg) {
   if (f_arg.size() != s_arg.size()) {
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 906ba5693ea70..a8a50dc72cea6 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -90,6 +90,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     session_options.SetThreadPoolSize(performance_test_config.run_config.session_thread_pool_size);
   // Set optimization level.
   session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level);
+  session_options.SetOptimizedModelFilePath(L"temp-optimized-model.onnx");
+
   if (!performance_test_config.run_config.profile_file.empty())
     session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str());
   session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options);

From 48ae9eac74d43529a0986a4541ebc9601e9e1672 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Thu, 4 Jul 2019 20:48:42 -0700
Subject: [PATCH 02/23] Removed duplicate symbol

---
 onnxruntime/core/providers/cpu/symbols.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/onnxruntime/core/providers/cpu/symbols.txt b/onnxruntime/core/providers/cpu/symbols.txt
index 658ffcf928d93..c02dade6da2a0 100644
--- a/onnxruntime/core/providers/cpu/symbols.txt
+++ b/onnxruntime/core/providers/cpu/symbols.txt
@@ -80,7 +80,6 @@ OrtSetDimensions
 OrtSetSessionGraphOptimizationLevel
 OrtSetSessionLogId
 OrtSetSessionLogVerbosityLevel
-OrtSetSessionGraphOptimizationLevel
 OrtSetOptimizedModelFilePath
 OrtSetSessionThreadPoolSize
 OrtSetTensorElementType

From b4d09aa87db33d8e54d046f520facd621c41108a Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Thu, 4 Jul 2019 22:16:08 -0700
Subject: [PATCH 03/23] Minor update

---
 csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index b406d21106b6c..53d587c58adbd 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -661,9 +661,10 @@ private void TestModelSerialization()
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
 
-            // Set the graph optimization level for this session.
+            // Set the optimized model file path to assert that no exception are thrown.
             SessionOptions options = new SessionOptions();
-            options.SetOptimizedModelFilePath("squeezenet-Transform1.onnx");
+            options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx"));
+            options.SetSessionGraphOptimizationLevel(1);
             var session = new InferenceSession(modelPath, options);
         }
 

From 0d0b6fc9b8c7491d8c3c682e692271c73d28de9e Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Fri, 5 Jul 2019 17:00:50 -0700
Subject: [PATCH 04/23] Review comments

---
 csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 3 ++-
 include/onnxruntime/core/session/onnxruntime_c_api.h        | 2 +-
 onnxruntime/test/framework/inference_session_test.cc        | 3 ++-
 onnxruntime/test/perftest/ort_test_session.cc               | 2 --
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 53d587c58adbd..0af28907d00d8 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -663,9 +663,10 @@ private void TestModelSerialization()
 
             // Set the optimized model file path to assert that no exception are thrown.
             SessionOptions options = new SessionOptions();
-            options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx"));
+            options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform-Level1.onnx"));
             options.SetSessionGraphOptimizationLevel(1);
             var session = new InferenceSession(modelPath, options);
+            Assert.NotNull(session);
         }
 
         [GpuFact]
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 8c34399334f85..1211109875757 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -202,7 +202,7 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess,
 ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output);
 
 // Set filepath to save optimized model after graph level transformations.
-ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* in, _In_ const ORTCHAR_T* optimized_model_filepath);
+ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath);
 
 // create a copy of an existing OrtSessionOptions
 ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output);
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index c7879432c6593..61c17e48b4766 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -333,7 +333,8 @@ TEST(InferenceSessionTests, DisableCPUArena) {
 }
 
 TEST(InferenceSessionTests, TestModelSerialization) {
-  // Load model with level1 tranform level and serialize model the after transformation.
+  // Load model with level1 transform level as session options 
+  // and serialize the model after transformation.
   SessionOptions so;
   const string test_model = "testdata/transform/abs-id-max.onnx";
   so.session_logid = "InferenceSessionTests.TestModelSerialization";
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index a8a50dc72cea6..906ba5693ea70 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -90,8 +90,6 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     session_options.SetThreadPoolSize(performance_test_config.run_config.session_thread_pool_size);
   // Set optimization level.
   session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level);
-  session_options.SetOptimizedModelFilePath(L"temp-optimized-model.onnx");
-
   if (!performance_test_config.run_config.profile_file.empty())
     session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str());
   session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options);

From 523b0f08ea2ee5b5f671b4bc572c7a29a1be66f5 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Mon, 22 Jul 2019 06:03:43 -0700
Subject: [PATCH 05/23] add tests

---
 .../Microsoft.ML.OnnxRuntime.csproj           | 24 -------------------
 .../InferenceTest.cs                          |  7 +++---
 .../python/onnxruntime_pybind_state.cc        |  2 ++
 .../test/python/onnxruntime_test_python.py    |  9 ++++++-
 4 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
index 0bc9a0ed2dffc..bffa46e623c56 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
@@ -46,30 +46,6 @@
           CopyToOutputDirectory="Never"
           Visible="false"
     />
-    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_api.h"
-          PackagePath="\build\native\include"
-          Pack="true"
-          CopyToOutputDirectory="Never"
-          Visible="false"
-    />
-    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_inline.h"
-          PackagePath="\build\native\include"
-          Pack="true"
-          CopyToOutputDirectory="Never"
-          Visible="false"
-    />
-    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_api.h"
-          PackagePath="\build\native\include"
-          Pack="true"
-          CopyToOutputDirectory="Never"
-          Visible="false"
-    />
-    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_inline.h"
-          PackagePath="\build\native\include"
-          Pack="true"
-          CopyToOutputDirectory="Never"
-          Visible="false"
-    />
     <None Include="$(NativeBuildOutputDir)\onnxruntime.lib"
           PackagePath="\runtimes\win-$(TargetArchitecture)\native"
           Pack="true"
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 53d587c58adbd..6d6bb4b5a8045 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -660,12 +660,13 @@ private void TestModelSequenceOfMapStringFloat()
         private void TestModelSerialization()
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
-
+            string modelOutputPath = Path.Combine(Directory.GetCurrentDirectory(), "optimized-squeezenet.onnx");
             // Set the optimized model file path to assert that no exception are thrown.
             SessionOptions options = new SessionOptions();
-            options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx"));
+            options.SetOptimizedModelFilePath(modelOutputPath);
             options.SetSessionGraphOptimizationLevel(1);
-            var session = new InferenceSession(modelPath, options);
+            new InferenceSession(modelPath, options);
+            Assert.True(File.Exists(modelOutputPath));
         }
 
         [GpuFact]
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index e29c6cf3f8850..6545612a2eb4c 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -389,6 +389,8 @@ void addObjectMethods(py::module& m) {
 Set this option to false if you don't want it. Default is True.)pbdoc")
       .def_readwrite("enable_profiling", &SessionOptions::enable_profiling,
                      R"pbdoc(Enable profiling for this session. Default is false.)pbdoc")
+      .def_readwrite("optimized_model_filepath", &SessionOptions::optimized_model_filepath,
+                     R"pbdoc(File path to serialize optimized model. By default, optimized model is not serialized if optimized_model_filepath is not provided.)pbdoc")
       .def_readwrite("enable_mem_pattern", &SessionOptions::enable_mem_pattern,
                      R"pbdoc(Enable the memory pattern optimization. Default is true.)pbdoc")
       .def_readwrite("enable_sequential_execution", &SessionOptions::enable_sequential_execution,
diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
index 72d763af7da3d..88fb47d3b0990 100644
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -31,8 +31,15 @@ def run_model(self, session_object, run_options):
         output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32)
         np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08)
 
+    def testModelSerialization(self):
+        so = onnxrt.SessionOptions()
+        so.session_log_verbosity_level = 1
+        so.session_logid = "TestModelSerialization"
+        so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx"
+        onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so)
+        self.assertTrue(os.path.isfile(so.optimized_model_filepath))
+
     def testRunModel(self):
-        sess = onnxrt.InferenceSession(self.get_name("mul_1.onnx"))
         x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
         input_name = sess.get_inputs()[0].name
         self.assertEqual(input_name, "X")

From 8b656a234d4998d13c117734774760f427eaae16 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Thu, 4 Jul 2019 15:43:45 -0700
Subject: [PATCH 06/23] Model serialization

---
 .../Microsoft.ML.OnnxRuntime.csproj           | 24 ++++++++++
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  3 ++
 .../SessionOptions.cs                         |  9 ++++
 .../InferenceTest.cs                          | 13 ++++-
 .../core/session/onnxruntime_c_api.h          |  3 ++
 .../core/session/onnxruntime_cxx_api.h        |  2 +
 .../core/session/onnxruntime_cxx_inline.h     |  5 ++
 onnxruntime/core/providers/cpu/symbols.txt    |  2 +
 .../core/session/abi_session_options.cc       |  6 +++
 onnxruntime/core/session/inference_session.cc |  5 ++
 onnxruntime/core/session/inference_session.h  |  3 ++
 .../test/framework/inference_session_test.cc  | 47 +++++++++++++++++++
 onnxruntime/test/perftest/ort_test_session.cc |  2 +
 13 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
index bffa46e623c56..0bc9a0ed2dffc 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
@@ -46,6 +46,30 @@
           CopyToOutputDirectory="Never"
           Visible="false"
     />
+    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_api.h"
+          PackagePath="\build\native\include"
+          Pack="true"
+          CopyToOutputDirectory="Never"
+          Visible="false"
+    />
+    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_inline.h"
+          PackagePath="\build\native\include"
+          Pack="true"
+          CopyToOutputDirectory="Never"
+          Visible="false"
+    />
+    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_api.h"
+          PackagePath="\build\native\include"
+          Pack="true"
+          CopyToOutputDirectory="Never"
+          Visible="false"
+    />
+    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_inline.h"
+          PackagePath="\build\native\include"
+          Pack="true"
+          CopyToOutputDirectory="Never"
+          Visible="false"
+    />
     <None Include="$(NativeBuildOutputDir)\onnxruntime.lib"
           PackagePath="\runtimes\win-$(TargetArchitecture)\native"
           Pack="true"
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 7fd2c33c6a6cb..e0bf31d76fd23 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -130,6 +130,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtDisableSequentialExecution(IntPtr /*(OrtSessionOptions*)*/ options);
 
+        [DllImport(nativeLib, CharSet = charSet)]
+        public static extern IntPtr /*(OrtStatus*)*/ OrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, string optimizedModelFilepath);
+
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtEnableProfiling(IntPtr /* OrtSessionOptions* */ options, string profilePathPrefix);
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index 4ce708687ef79..13a334e0fd865 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -37,6 +37,15 @@ public void SetSessionGraphOptimizationLevel(uint optimization_level)
             NativeApiStatus.VerifySuccess(NativeMethods.OrtSetSessionGraphOptimizationLevel(_nativePtr, optimization_level));
         }
 
+        /// <summary>
+        ///  Set filepath to save optimized model after graph level transformations.
+        /// </summary>
+        /// <param name="optimizedModelFilepath">File path for saving optimized model.</param>
+        public void SetOptimizedModelFilePath(string optimizedModelFilepath)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSetOptimizedModelFilePath(_nativePtr, optimizedModelFilepath));
+        }
+
         /// <summary>
         /// Enable Sequential Execution. By default, it is enabled.
         /// </summary>
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 88bf5f83d4c8f..4bcbe21d37cfd 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -638,6 +638,17 @@ private void TestModelSequenceOfMapStringFloat()
             }
         }
 
+        [Fact]
+        private void TestModelSerialization()
+        {
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
+
+            // Set the graph optimization level for this session.
+            SessionOptions options = new SessionOptions();
+            options.SetOptimizedModelFilePath("squeezenet-Transform1.onnx");
+            var session = new InferenceSession(modelPath, options);
+        }
+
         [GpuFact]
         private void TestGpu()
         {
@@ -678,7 +689,7 @@ private void VerifyNativeMethodsExist()
             "OrtEnableSequentialExecution","OrtDisableSequentialExecution","OrtEnableProfiling","OrtDisableProfiling",
             "OrtEnableMemPattern","OrtDisableMemPattern","OrtEnableCpuMemArena","OrtDisableCpuMemArena",
             "OrtSetSessionLogId","OrtSetSessionLogVerbosityLevel","OrtSetSessionThreadPoolSize","OrtSetSessionGraphOptimizationLevel",
-            "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo",
+            "OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo",
             "OrtCreateDefaultAllocator","OrtAllocatorFree","OrtAllocatorGetInfo",
             "OrtCreateTensorWithDataAsOrtValue","OrtGetTensorMutableData", "OrtReleaseAllocatorInfo",
             "OrtCastTypeInfoToTensorInfo","OrtGetTensorTypeAndShape","OrtGetTensorElementType","OrtGetDimensionsCount",
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index c0c5cb8ef2ecd..c3023f59221dd 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -201,6 +201,9 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess,
  */
 ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output);
 
+// Set filepath to save optimized model after graph level transformations.
+ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* in, _In_ const ORTCHAR_T* optimized_model_filepath);
+
 // create a copy of an existing OrtSessionOptions
 ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output);
 ORT_API_STATUS(OrtEnableSequentialExecution, _In_ OrtSessionOptions* options);
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 8cd8dea637c15..067ea1bcdcf2f 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -139,6 +139,8 @@ struct SessionOptions : Base<OrtSessionOptions> {
   SessionOptions& EnableCpuMemArena();
   SessionOptions& DisableCpuMemArena();
 
+  SessionOptions& SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_file);
+
   SessionOptions& EnableProfiling(const ORTCHAR_T* profile_file_prefix);
   SessionOptions& DisableProfiling();
 
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index ccb7af2616b05..a81a0293868c2 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -137,6 +137,11 @@ inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(uint32_t graph_
   return *this;
 }
 
+inline SessionOptions& SessionOptions::SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_filepath) {
+  ORT_THROW_ON_ERROR(OrtSetOptimizedModelFilePath(p_, optimized_model_filepath));
+  return *this;
+}
+
 inline SessionOptions& SessionOptions::EnableProfiling(const ORTCHAR_T* profile_file_prefix) {
   ORT_THROW_ON_ERROR(OrtEnableProfiling(p_, profile_file_prefix));
   return *this;
diff --git a/onnxruntime/core/providers/cpu/symbols.txt b/onnxruntime/core/providers/cpu/symbols.txt
index fc4859442c667..e78e3ac6b71af 100644
--- a/onnxruntime/core/providers/cpu/symbols.txt
+++ b/onnxruntime/core/providers/cpu/symbols.txt
@@ -81,6 +81,8 @@ OrtSetDimensions
 OrtSetSessionGraphOptimizationLevel
 OrtSetSessionLogId
 OrtSetSessionLogVerbosityLevel
+OrtSetSessionGraphOptimizationLevel
+OrtSetOptimizedModelFilePath
 OrtSetSessionThreadPoolSize
 OrtSetTensorElementType
 OrtTensorProtoToOrtValue
diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc
index aeaab0b2488da..8027e99ec21fd 100644
--- a/onnxruntime/core/session/abi_session_options.cc
+++ b/onnxruntime/core/session/abi_session_options.cc
@@ -44,6 +44,12 @@ ORT_API_STATUS_IMPL(OrtDisableSequentialExecution, _In_ OrtSessionOptions* optio
   return nullptr;
 }
 
+// set filepath to save optimized onnx model.
+ORT_API_STATUS_IMPL(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath) {
+  options->value.optimized_model_filepath = optimized_model_filepath;
+  return nullptr;
+}
+
 // enable profiling for this session.
 ORT_API_STATUS_IMPL(OrtEnableProfiling, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* profile_file_prefix) {
   options->value.enable_profiling = true;
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index c4ed8be620680..20581f6322a32 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -528,6 +528,11 @@ common::Status InferenceSession::Initialize() {
     // now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs.
     ORT_RETURN_IF_ERROR(graph.Resolve());
 
+    if (!session_options_.optimized_model_filepath.empty()) {
+      // Serialize optimized onnx model.
+      Model::Save(*model_, session_options_.optimized_model_filepath);
+    }
+
     ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution));
     ORT_RETURN_IF_ERROR(session_initializer.InitializeAndSave(nullptr));
 
diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
index fe428006b4f0a..9b835eaa449d3 100644
--- a/onnxruntime/core/session/inference_session.h
+++ b/onnxruntime/core/session/inference_session.h
@@ -56,6 +56,9 @@ struct SessionOptions {
   // enable profiling for this session.
   bool enable_profiling = false;
 
+  // non empty filepath enables serialization of the transformed optimized model to the specified filepath.
+  std::basic_string<ORTCHAR_T> optimized_model_filepath;
+
   // enable the memory pattern optimization.
   // The idea is if the input shapes are the same, we could trace the internal memory allocation
   // and generate a memory pattern for future request. So next time we could just do one allocation
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index 1c4cdad4f867e..0c63e3c6e165e 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -330,6 +330,53 @@ TEST(InferenceSessionTests, DisableCPUArena) {
   RunModel(session_object, run_options);
 }
 
+TEST(InferenceSessionTests, TestModelSerialization) {
+  // Load model with level1 tranform level and serialize model the after transformation.
+  SessionOptions so;
+  const string test_model = "testdata/transform/abs-id-max.onnx";
+  so.session_logid = "InferenceSessionTests.TestModelSerialization";
+  so.enable_cpu_mem_arena = false;
+  so.graph_optimization_level = TransformerLevel::Level1;
+  so.optimized_model_filepath = ToWideString(test_model) + L"-TransformLevel-" + std::to_wstring(static_cast<uint32_t>(so.graph_optimization_level));
+  InferenceSession session_object{so, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object.Load(test_model).IsOK());
+  ASSERT_TRUE(session_object.Initialize().IsOK());
+
+ // Serialize model to the same file path again to make sure that rewrite doesn't fail.
+  InferenceSession overwrite_session_object{so, &DefaultLoggingManager()};
+  ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK());
+  ASSERT_TRUE(overwrite_session_object.Initialize().IsOK());
+
+  // Load serialized model with no tranform level and serialize model.
+  SessionOptions so_opt;
+  so_opt.session_logid = "InferenceSessionTests.TestModelSerialization";
+  so_opt.enable_cpu_mem_arena = false;
+  so_opt.graph_optimization_level = TransformerLevel::Default;
+  so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + L"-TransformLevel-" + std::to_wstring(static_cast<uint32_t>(so_opt.graph_optimization_level));
+  InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK());
+  ASSERT_TRUE(session_object_opt.Initialize().IsOK());
+  
+  // Assert that refeed of optimized model results with default transform level results 
+  // in same runtime model as mlnet_encoder.onnx with TransformLevel-1.
+  std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary);
+  ASSERT_TRUE(model_fs_session1.good());
+  std::ifstream model_fs_session2(so_opt.optimized_model_filepath, ios::in | ios::binary);
+  ASSERT_TRUE(model_fs_session2.good());
+  ASSERT_TRUE(model_fs_session1.tellg() == model_fs_session2.tellg());
+  model_fs_session1.seekg(0, std::ifstream::beg);
+  model_fs_session2.seekg(0, std::ifstream::beg);
+  ASSERT_TRUE(std::equal(std::istreambuf_iterator<char>(model_fs_session1.rdbuf()),
+                         std::istreambuf_iterator<char>(),
+                         std::istreambuf_iterator<char>(model_fs_session2.rdbuf())));
+
+  // Execute with empty optimized model filepath doesn't fail loading.
+  so_opt.optimized_model_filepath = L"";
+  InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK());
+  ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK());
+}
+
 #ifdef ORT_RUN_EXTERNAL_ONNX_TESTS
 static bool Compare(const InputDefList& f_arg, const InputDefList& s_arg) {
   if (f_arg.size() != s_arg.size()) {
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 906ba5693ea70..a8a50dc72cea6 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -90,6 +90,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     session_options.SetThreadPoolSize(performance_test_config.run_config.session_thread_pool_size);
   // Set optimization level.
   session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level);
+  session_options.SetOptimizedModelFilePath(L"temp-optimized-model.onnx");
+
   if (!performance_test_config.run_config.profile_file.empty())
     session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str());
   session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options);

From 35f5b90a1253858d0c9b33b1f4c4ba7bcb475193 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Thu, 4 Jul 2019 20:48:42 -0700
Subject: [PATCH 07/23] Removed duplicate symbol

---
 onnxruntime/core/providers/cpu/symbols.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/onnxruntime/core/providers/cpu/symbols.txt b/onnxruntime/core/providers/cpu/symbols.txt
index e78e3ac6b71af..98e700f837f46 100644
--- a/onnxruntime/core/providers/cpu/symbols.txt
+++ b/onnxruntime/core/providers/cpu/symbols.txt
@@ -81,7 +81,6 @@ OrtSetDimensions
 OrtSetSessionGraphOptimizationLevel
 OrtSetSessionLogId
 OrtSetSessionLogVerbosityLevel
-OrtSetSessionGraphOptimizationLevel
 OrtSetOptimizedModelFilePath
 OrtSetSessionThreadPoolSize
 OrtSetTensorElementType

From 0220e7d8071bd276d1b0e1eddf1b64fd0d69034f Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Thu, 4 Jul 2019 22:16:08 -0700
Subject: [PATCH 08/23] Minor update

---
 csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 4bcbe21d37cfd..f3e5059020cd0 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -643,9 +643,10 @@ private void TestModelSerialization()
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
 
-            // Set the graph optimization level for this session.
+            // Set the optimized model file path to assert that no exception are thrown.
             SessionOptions options = new SessionOptions();
-            options.SetOptimizedModelFilePath("squeezenet-Transform1.onnx");
+            options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx"));
+            options.SetSessionGraphOptimizationLevel(1);
             var session = new InferenceSession(modelPath, options);
         }
 

From 96d9345405fe6933cc6e91228ef875f4efe23f80 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Fri, 5 Jul 2019 23:59:51 +0000
Subject: [PATCH 09/23] Merged PR 1106437: Model Serialization in onnxruntime


From d82f78e9422077148f53603eb6cbfd9ce09a6222 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Fri, 5 Jul 2019 17:00:50 -0700
Subject: [PATCH 10/23] Review comments

---
 csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 3 ++-
 include/onnxruntime/core/session/onnxruntime_c_api.h        | 2 +-
 onnxruntime/test/framework/inference_session_test.cc        | 3 ++-
 onnxruntime/test/perftest/ort_test_session.cc               | 2 --
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index f3e5059020cd0..0c96f5d2781cc 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -645,9 +645,10 @@ private void TestModelSerialization()
 
             // Set the optimized model file path to assert that no exception are thrown.
             SessionOptions options = new SessionOptions();
-            options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx"));
+            options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform-Level1.onnx"));
             options.SetSessionGraphOptimizationLevel(1);
             var session = new InferenceSession(modelPath, options);
+            Assert.NotNull(session);
         }
 
         [GpuFact]
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index c3023f59221dd..62b1f1051bf8f 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -202,7 +202,7 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess,
 ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output);
 
 // Set filepath to save optimized model after graph level transformations.
-ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* in, _In_ const ORTCHAR_T* optimized_model_filepath);
+ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath);
 
 // create a copy of an existing OrtSessionOptions
 ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output);
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index 0c63e3c6e165e..117c2807d740a 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -331,7 +331,8 @@ TEST(InferenceSessionTests, DisableCPUArena) {
 }
 
 TEST(InferenceSessionTests, TestModelSerialization) {
-  // Load model with level1 tranform level and serialize model the after transformation.
+  // Load model with level1 transform level as session options 
+  // and serialize the model after transformation.
   SessionOptions so;
   const string test_model = "testdata/transform/abs-id-max.onnx";
   so.session_logid = "InferenceSessionTests.TestModelSerialization";
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index a8a50dc72cea6..906ba5693ea70 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -90,8 +90,6 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     session_options.SetThreadPoolSize(performance_test_config.run_config.session_thread_pool_size);
   // Set optimization level.
   session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level);
-  session_options.SetOptimizedModelFilePath(L"temp-optimized-model.onnx");
-
   if (!performance_test_config.run_config.profile_file.empty())
     session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str());
   session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options);

From 09000312348f35b65fb0b5a4170efb0548d6c591 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Sat, 6 Jul 2019 00:01:33 +0000
Subject: [PATCH 11/23] Merged PR 1107226: Review comments

Review comments

From 57d4f5aa57bfc414f2b9582588d54fdf5dbb956e Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Mon, 22 Jul 2019 06:03:43 -0700
Subject: [PATCH 12/23] add tests

---
 .../Microsoft.ML.OnnxRuntime.csproj           | 24 -------------------
 .../InferenceTest.cs                          |  5 ++--
 .../python/onnxruntime_pybind_state.cc        |  2 ++
 .../test/python/onnxruntime_test_python.py    |  9 ++++++-
 4 files changed, 13 insertions(+), 27 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
index 0bc9a0ed2dffc..bffa46e623c56 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj
@@ -46,30 +46,6 @@
           CopyToOutputDirectory="Never"
           Visible="false"
     />
-    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_api.h"
-          PackagePath="\build\native\include"
-          Pack="true"
-          CopyToOutputDirectory="Never"
-          Visible="false"
-    />
-    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_inline.h"
-          PackagePath="\build\native\include"
-          Pack="true"
-          CopyToOutputDirectory="Never"
-          Visible="false"
-    />
-    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_api.h"
-          PackagePath="\build\native\include"
-          Pack="true"
-          CopyToOutputDirectory="Never"
-          Visible="false"
-    />
-    <None Include="$(OnnxRuntimeCsharpRoot)\..\include\onnxruntime\core\session\onnxruntime_cxx_inline.h"
-          PackagePath="\build\native\include"
-          Pack="true"
-          CopyToOutputDirectory="Never"
-          Visible="false"
-    />
     <None Include="$(NativeBuildOutputDir)\onnxruntime.lib"
           PackagePath="\runtimes\win-$(TargetArchitecture)\native"
           Pack="true"
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 0c96f5d2781cc..471d7fb267ff0 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -642,13 +642,14 @@ private void TestModelSequenceOfMapStringFloat()
         private void TestModelSerialization()
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
-
+            string modelOutputPath = Path.Combine(Directory.GetCurrentDirectory(), "optimized-squeezenet.onnx");
             // Set the optimized model file path to assert that no exception are thrown.
             SessionOptions options = new SessionOptions();
-            options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform-Level1.onnx"));
+            options.SetOptimizedModelFilePath(modelOutputPath);
             options.SetSessionGraphOptimizationLevel(1);
             var session = new InferenceSession(modelPath, options);
             Assert.NotNull(session);
+            Assert.True(File.Exists(modelOutputPath));
         }
 
         [GpuFact]
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index e29c6cf3f8850..6545612a2eb4c 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -389,6 +389,8 @@ void addObjectMethods(py::module& m) {
 Set this option to false if you don't want it. Default is True.)pbdoc")
       .def_readwrite("enable_profiling", &SessionOptions::enable_profiling,
                      R"pbdoc(Enable profiling for this session. Default is false.)pbdoc")
+      .def_readwrite("optimized_model_filepath", &SessionOptions::optimized_model_filepath,
+                     R"pbdoc(File path to serialize optimized model. By default, optimized model is not serialized if optimized_model_filepath is not provided.)pbdoc")
       .def_readwrite("enable_mem_pattern", &SessionOptions::enable_mem_pattern,
                      R"pbdoc(Enable the memory pattern optimization. Default is true.)pbdoc")
       .def_readwrite("enable_sequential_execution", &SessionOptions::enable_sequential_execution,
diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
index 4a9deced107ab..d2cf3fe63aacf 100644
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -34,8 +34,15 @@ def run_model(self, session_object, run_options):
         np.testing.assert_allclose(
             output_expected, res[0], rtol=1e-05, atol=1e-08)
 
+    def testModelSerialization(self):
+        so = onnxrt.SessionOptions()
+        so.session_log_verbosity_level = 1
+        so.session_logid = "TestModelSerialization"
+        so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx"
+        onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so)
+        self.assertTrue(os.path.isfile(so.optimized_model_filepath))
+
     def testRunModel(self):
-        sess = onnxrt.InferenceSession(self.get_name("mul_1.onnx"))
         x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
         input_name = sess.get_inputs()[0].name
         self.assertEqual(input_name, "X")

From f0b8a1e55e6341ce4af23506d94957a0b19306a4 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Mon, 22 Jul 2019 13:48:22 -0700
Subject: [PATCH 13/23] Fixed merge conflict

---
 onnxruntime/test/python/onnxruntime_test_python.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
index d2cf3fe63aacf..efdbb7443ad3a 100644
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -42,6 +42,14 @@ def testModelSerialization(self):
         onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so)
         self.assertTrue(os.path.isfile(so.optimized_model_filepath))
 
+    def testModelSerialization(self):
+        so = onnxrt.SessionOptions()
+        so.session_log_verbosity_level = 1
+        so.session_logid = "TestModelSerialization"
+        so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx"
+        onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so)
+        self.assertTrue(os.path.isfile(so.optimized_model_filepath))
+
     def testRunModel(self):
         x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
         input_name = sess.get_inputs()[0].name

From b5e45d3c2ae26f049838ff2bb8577a7b180b940e Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Mon, 22 Jul 2019 23:01:14 -0700
Subject: [PATCH 14/23] Correct python tests

---
 .../test/python/onnxruntime_test_python.py      | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
index 65b833d9e06fe..bfd8d5d7c898a 100644
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -42,23 +42,8 @@ def testModelSerialization(self):
         onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so)
         self.assertTrue(os.path.isfile(so.optimized_model_filepath))
 
-    def testModelSerialization(self):
-        so = onnxrt.SessionOptions()
-        so.session_log_verbosity_level = 1
-        so.session_logid = "TestModelSerialization"
-        so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx"
-        onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so)
-        self.assertTrue(os.path.isfile(so.optimized_model_filepath))
-
-    def testModelSerialization(self):
-        so = onnxrt.SessionOptions()
-        so.session_log_verbosity_level = 1
-        so.session_logid = "TestModelSerialization"
-        so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx"
-        onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so)
-        self.assertTrue(os.path.isfile(so.optimized_model_filepath))
-
     def testRunModel(self):
+        sess = onnxrt.InferenceSession(self.get_name("mul_1.onnx"))
         x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
         input_name = sess.get_inputs()[0].name
         self.assertEqual(input_name, "X")

From e30213d3d193cc7e7636cf4ff10798d9b9c13df3 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Mon, 5 Aug 2019 11:51:57 -0700
Subject: [PATCH 15/23] InferenceSesssion Refeed Test

---
 onnxruntime/core/session/inference_session.cc |  2 +-
 .../test/framework/inference_session_test.cc  | 39 ++++++++++++++++---
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 2b7fad42ba8c1..68e7e72ea5802 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -512,7 +512,7 @@ common::Status InferenceSession::Initialize() {
 
     if (!session_options_.optimized_model_filepath.empty()) {
       // Serialize optimized onnx model.
-      Model::Save(*model_, session_options_.optimized_model_filepath);
+      ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath));
     }
 
     ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution));
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index 61c17e48b4766..31ab2cf558f37 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -120,6 +120,18 @@ class FuseExecutionProvider : public IExecutionProvider {
   }
 };
 
+// InferenceSession wrapper to expose loaded graph.
+class InferenceSessionGetGraphWrapper : public InferenceSession {
+ public:
+  explicit InferenceSessionGetGraphWrapper(const SessionOptions& session_options,
+                                          logging::LoggingManager* logging_manager) : InferenceSession(session_options, logging_manager) {
+  }
+
+  const Graph& GetGraph() {
+    return model_->MainGraph();
+  }
+};
+
 namespace test {
 static void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
                           const std::vector<float>& expected_values);
@@ -339,13 +351,28 @@ TEST(InferenceSessionTests, TestModelSerialization) {
   const string test_model = "testdata/transform/abs-id-max.onnx";
   so.session_logid = "InferenceSessionTests.TestModelSerialization";
   so.enable_cpu_mem_arena = false;
+  so.graph_optimization_level = TransformerLevel::Default;
+  InferenceSessionGetGraphWrapper session_object_noopt{so, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK());
+  ASSERT_TRUE(session_object_noopt.Initialize().IsOK());
+
+  // Assert that model has been transformed.
+  const auto& graph_noopt = session_object_noopt.GetGraph();
+  std::map<std::string, int> op_to_count_noopt = CountOpsInGraph(graph_noopt);
+  ASSERT_TRUE(op_to_count_noopt["Identity"] > 0);
+
   so.graph_optimization_level = TransformerLevel::Level1;
-  so.optimized_model_filepath = ToWideString(test_model) + L"-TransformLevel-" + std::to_wstring(static_cast<uint32_t>(so.graph_optimization_level));
-  InferenceSession session_object{so, &DefaultLoggingManager()};
+  so.optimized_model_filepath = ToWideString(test_model + "-TransformLevel-" + std::to_string(static_cast<uint32_t>(so.graph_optimization_level)));
+  InferenceSessionGetGraphWrapper session_object{so, &DefaultLoggingManager()};
   ASSERT_TRUE(session_object.Load(test_model).IsOK());
   ASSERT_TRUE(session_object.Initialize().IsOK());
+ 
+  // Assert that model has been transformed and identify Node is removed.
+  const auto& graph = session_object.GetGraph();
+  std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
+  ASSERT_TRUE(op_to_count["Identity"] == 0);
 
- // Serialize model to the same file path again to make sure that rewrite doesn't fail.
+  // Serialize model to the same file path again to make sure that rewrite doesn't fail.
   InferenceSession overwrite_session_object{so, &DefaultLoggingManager()};
   ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK());
   ASSERT_TRUE(overwrite_session_object.Initialize().IsOK());
@@ -355,13 +382,13 @@ TEST(InferenceSessionTests, TestModelSerialization) {
   so_opt.session_logid = "InferenceSessionTests.TestModelSerialization";
   so_opt.enable_cpu_mem_arena = false;
   so_opt.graph_optimization_level = TransformerLevel::Default;
-  so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + L"-TransformLevel-" + std::to_wstring(static_cast<uint32_t>(so_opt.graph_optimization_level));
+  so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + ToWideString("-TransformLevel-" + std::to_string(static_cast<uint32_t>(so_opt.graph_optimization_level)));
   InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()};
   ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK());
   ASSERT_TRUE(session_object_opt.Initialize().IsOK());
   
-  // Assert that refeed of optimized model results with default transform level results 
-  // in same runtime model as mlnet_encoder.onnx with TransformLevel-1.
+  // Assert that refeed of optimized model with default transform level results
+  // in same runtime model as abs-id-max.onnx with TransformLevel-1.
   std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary);
   ASSERT_TRUE(model_fs_session1.good());
   std::ifstream model_fs_session2(so_opt.optimized_model_filepath, ios::in | ios::binary);

From f47922a7d2fdd591cddecdc0da9074493041d2d3 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Mon, 5 Aug 2019 13:09:05 -0700
Subject: [PATCH 16/23] Replace use of widechar const literal-L

---
 onnxruntime/test/framework/inference_session_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index 04333927a73a9..f46f70ac06ad1 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -399,7 +399,7 @@ TEST(InferenceSessionTests, TestModelSerialization) {
                          std::istreambuf_iterator<char>(model_fs_session2.rdbuf())));
 
   // Execute with empty optimized model filepath doesn't fail loading.
-  so_opt.optimized_model_filepath = L"";
+  so_opt.optimized_model_filepath = ToWideString("");
   InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()};
   ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK());
   ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK());

From f500ee7891ff64ac1d4317dbe8e09d1c921ae6c8 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Tue, 6 Aug 2019 00:30:27 -0700
Subject: [PATCH 17/23] Fixed failing tests

---
 csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index e0bf31d76fd23..a525a30862ab8 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -131,7 +131,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         public static extern IntPtr /*(OrtStatus*)*/ OrtDisableSequentialExecution(IntPtr /*(OrtSessionOptions*)*/ options);
 
         [DllImport(nativeLib, CharSet = charSet)]
-        public static extern IntPtr /*(OrtStatus*)*/ OrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, string optimizedModelFilepath);
+        public static extern IntPtr /*(OrtStatus*)*/ OrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, [MarshalAs(UnmanagedType.LPWStr)]string optimizedModelFilepath);
 
         [DllImport(nativeLib, CharSet = charSet)]
         public static extern IntPtr /*(OrtStatus*)*/ OrtEnableProfiling(IntPtr /* OrtSessionOptions* */ options, string profilePathPrefix);

From 6b938d9c851c26d8dc1191558618e5e1f64e2522 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Tue, 6 Aug 2019 15:45:57 -0700
Subject: [PATCH 18/23] Updated comment

---
 onnxruntime/test/framework/inference_session_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index f46f70ac06ad1..cab9b2aaf1173 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -354,7 +354,7 @@ TEST(InferenceSessionTests, TestModelSerialization) {
   ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK());
   ASSERT_TRUE(session_object_noopt.Initialize().IsOK());
 
-  // Assert that model has been transformed.
+  // Assert that model has been Identity Nodes.
   const auto& graph_noopt = session_object_noopt.GetGraph();
   std::map<std::string, int> op_to_count_noopt = CountOpsInGraph(graph_noopt);
   ASSERT_TRUE(op_to_count_noopt["Identity"] > 0);

From 88f56a61e1103473cfbb5aa0bd5085eeeca14c82 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Wed, 7 Aug 2019 15:13:43 -0700
Subject: [PATCH 19/23] Removed unnecessary session options

---
 onnxruntime/test/framework/inference_session_test.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index cab9b2aaf1173..b883c7c0bfd6a 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -348,7 +348,6 @@ TEST(InferenceSessionTests, TestModelSerialization) {
   SessionOptions so;
   const string test_model = "testdata/transform/abs-id-max.onnx";
   so.session_logid = "InferenceSessionTests.TestModelSerialization";
-  so.enable_cpu_mem_arena = false;
   so.graph_optimization_level = TransformerLevel::Default;
   InferenceSessionGetGraphWrapper session_object_noopt{so, &DefaultLoggingManager()};
   ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK());
@@ -378,7 +377,6 @@ TEST(InferenceSessionTests, TestModelSerialization) {
   // Load serialized model with no tranform level and serialize model.
   SessionOptions so_opt;
   so_opt.session_logid = "InferenceSessionTests.TestModelSerialization";
-  so_opt.enable_cpu_mem_arena = false;
   so_opt.graph_optimization_level = TransformerLevel::Default;
   so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + ToWideString("-TransformLevel-" + std::to_string(static_cast<uint32_t>(so_opt.graph_optimization_level)));
   InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()};

From 05fbef55de92e2a23f5b9b00c35e536fdbb61c08 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Wed, 7 Aug 2019 15:56:09 -0700
Subject: [PATCH 20/23] Spell check on comments

---
 onnxruntime/core/session/inference_session.cc     |  2 +-
 .../test/framework/inference_session_test.cc      | 15 ++++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 8e0dee067b712..60e2bd7af0c2d 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -529,7 +529,7 @@ common::Status InferenceSession::Initialize() {
     ORT_RETURN_IF_ERROR(graph.Resolve());
 
     if (!session_options_.optimized_model_filepath.empty()) {
-      // Serialize optimized onnx model.
+      // Serialize optimized ONNX model.
       ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath));
     }
 
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index b883c7c0bfd6a..9f9ee292a4198 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -343,8 +343,8 @@ TEST(InferenceSessionTests, DisableCPUArena) {
 }
 
 TEST(InferenceSessionTests, TestModelSerialization) {
-  // Load model with level1 transform level as session options 
-  // and serialize the model after transformation.
+  // Load model with level 0 transform level
+  // and assert that the model has Identity nodes.
   SessionOptions so;
   const string test_model = "testdata/transform/abs-id-max.onnx";
   so.session_logid = "InferenceSessionTests.TestModelSerialization";
@@ -353,18 +353,19 @@ TEST(InferenceSessionTests, TestModelSerialization) {
   ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK());
   ASSERT_TRUE(session_object_noopt.Initialize().IsOK());
 
-  // Assert that model has been Identity Nodes.
+  // Assert that model has Identity Nodes.
   const auto& graph_noopt = session_object_noopt.GetGraph();
   std::map<std::string, int> op_to_count_noopt = CountOpsInGraph(graph_noopt);
   ASSERT_TRUE(op_to_count_noopt["Identity"] > 0);
 
+  // Load model with level 1 transform level.
   so.graph_optimization_level = TransformerLevel::Level1;
   so.optimized_model_filepath = ToWideString(test_model + "-TransformLevel-" + std::to_string(static_cast<uint32_t>(so.graph_optimization_level)));
   InferenceSessionGetGraphWrapper session_object{so, &DefaultLoggingManager()};
   ASSERT_TRUE(session_object.Load(test_model).IsOK());
   ASSERT_TRUE(session_object.Initialize().IsOK());
  
-  // Assert that model has been transformed and identify Node is removed.
+  // Assert that model has been transformed and identity Node is removed.
   const auto& graph = session_object.GetGraph();
   std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
   ASSERT_TRUE(op_to_count["Identity"] == 0);
@@ -374,7 +375,7 @@ TEST(InferenceSessionTests, TestModelSerialization) {
   ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK());
   ASSERT_TRUE(overwrite_session_object.Initialize().IsOK());
 
-  // Load serialized model with no tranform level and serialize model.
+  // Load serialized model with no transform level and serialize model.
   SessionOptions so_opt;
   so_opt.session_logid = "InferenceSessionTests.TestModelSerialization";
   so_opt.graph_optimization_level = TransformerLevel::Default;
@@ -383,7 +384,7 @@ TEST(InferenceSessionTests, TestModelSerialization) {
   ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK());
   ASSERT_TRUE(session_object_opt.Initialize().IsOK());
   
-  // Assert that refeed of optimized model with default transform level results
+  // Assert that re-feed of optimized model with default transform level results
   // in same runtime model as abs-id-max.onnx with TransformLevel-1.
   std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary);
   ASSERT_TRUE(model_fs_session1.good());
@@ -396,7 +397,7 @@ TEST(InferenceSessionTests, TestModelSerialization) {
                          std::istreambuf_iterator<char>(),
                          std::istreambuf_iterator<char>(model_fs_session2.rdbuf())));
 
-  // Execute with empty optimized model filepath doesn't fail loading.
+  // Execute with empty optimized model file-path doesn't fail loading.
   so_opt.optimized_model_filepath = ToWideString("");
   InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()};
   ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK());

From 318491cdb1f99c89b7a72e95e842e4088a0ecd96 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Wed, 7 Aug 2019 19:17:18 -0700
Subject: [PATCH 21/23] Do not serialize when level 3 optimization specified

---
 onnxruntime/core/session/inference_session.cc        |  2 +-
 onnxruntime/test/framework/inference_session_test.cc | 11 ++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 60e2bd7af0c2d..e3d1a27187fd3 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -528,7 +528,7 @@ common::Status InferenceSession::Initialize() {
     // now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs.
     ORT_RETURN_IF_ERROR(graph.Resolve());
 
-    if (!session_options_.optimized_model_filepath.empty()) {
+    if (session_options_.graph_optimization_level < TransformerLevel::Level3 && !session_options_.optimized_model_filepath.empty()) {
       // Serialize optimized ONNX model.
       ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath));
     }
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index 9f9ee292a4198..9d4f2e419371c 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -397,11 +397,20 @@ TEST(InferenceSessionTests, TestModelSerialization) {
                          std::istreambuf_iterator<char>(),
                          std::istreambuf_iterator<char>(model_fs_session2.rdbuf())));
 
-  // Execute with empty optimized model file-path doesn't fail loading.
+  // Assert that empty optimized model file-path doesn't fail loading.
   so_opt.optimized_model_filepath = ToWideString("");
   InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()};
   ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK());
   ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK());
+
+  // Assert that level 3 optimization doesn't result in serialized model.
+  so_opt.optimized_model_filepath = ToWideString("ShouldNotSerialize");
+  so_opt.graph_optimization_level = TransformerLevel::Level3;
+  InferenceSession session_object_Level3Test{so_opt, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object_Level3Test.Load(test_model).IsOK());
+  ASSERT_TRUE(session_object_Level3Test.Initialize().IsOK());
+  std::ifstream model_fs_Level3(so_opt.optimized_model_filepath, ios::in | ios::binary);
+  ASSERT_TRUE(model_fs_Level3.fail());
 }
 
 #ifdef ORT_RUN_EXTERNAL_ONNX_TESTS

From 779bcd02470996b90772c303a300e2d386eb11ec Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Thu, 8 Aug 2019 16:39:38 -0700
Subject: [PATCH 22/23] Updated error logs

---
 onnxruntime/core/session/inference_session.cc | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index e3d1a27187fd3..5c87efa467ca6 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -528,9 +528,14 @@ common::Status InferenceSession::Initialize() {
     // now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs.
     ORT_RETURN_IF_ERROR(graph.Resolve());
 
-    if (session_options_.graph_optimization_level < TransformerLevel::Level3 && !session_options_.optimized_model_filepath.empty()) {
-      // Serialize optimized ONNX model.
-      ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath));
+    if (!session_options_.optimized_model_filepath.empty()) {
+      if (session_options_.graph_optimization_level < TransformerLevel::Level3) {
+        // Serialize optimized ONNX model.
+        ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath));
+      } else {
+        LOGS(*session_logger_, ERROR) << "Serializing Optimized ONNX model with Graph Optimization"
+                                        " level greater than 2 is not supported.";
+      }
     }
 
     ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution));

From ba6ce3a8bdbc36067e826e4754c6b2db5c348da0 Mon Sep 17 00:00:00 2001
From: Pulkit Tomar <putomar@microsoft.com>
Date: Fri, 9 Aug 2019 12:06:06 -0700
Subject: [PATCH 23/23] Changed log severity to WARN

---
 onnxruntime/core/session/inference_session.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 5c87efa467ca6..ed097d11f1482 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -533,7 +533,7 @@ common::Status InferenceSession::Initialize() {
         // Serialize optimized ONNX model.
         ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath));
       } else {
-        LOGS(*session_logger_, ERROR) << "Serializing Optimized ONNX model with Graph Optimization"
+        LOGS(*session_logger_, WARNING) << "Serializing Optimized ONNX model with Graph Optimization"
                                         " level greater than 2 is not supported.";
       }
     }