From ac4309a1ed95e869cf15321ba2f78e856c1df27e Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Thu, 4 Jul 2019 15:43:45 -0700 Subject: [PATCH 01/23] Model serialization --- .../Microsoft.ML.OnnxRuntime.csproj | 24 ++++++++++ .../Microsoft.ML.OnnxRuntime/NativeMethods.cs | 3 ++ .../SessionOptions.cs | 9 ++++ .../InferenceTest.cs | 13 ++++- .../core/session/onnxruntime_c_api.h | 3 ++ .../core/session/onnxruntime_cxx_api.h | 2 + .../core/session/onnxruntime_cxx_inline.h | 5 ++ onnxruntime/core/providers/cpu/symbols.txt | 2 + .../core/session/abi_session_options.cc | 6 +++ onnxruntime/core/session/inference_session.cc | 5 ++ onnxruntime/core/session/inference_session.h | 3 ++ .../test/framework/inference_session_test.cc | 47 +++++++++++++++++++ onnxruntime/test/perftest/ort_test_session.cc | 2 + 13 files changed, 123 insertions(+), 1 deletion(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index bffa46e623c56..0bc9a0ed2dffc 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -46,6 +46,30 @@ CopyToOutputDirectory="Never" Visible="false" /> + + + + + /// Set filepath to save optimized model after graph level transformations. + /// + /// File path for saving optimized model. + public void SetOptimizedModelFilePath(string optimizedModelFilepath) + { + NativeApiStatus.VerifySuccess(NativeMethods.OrtSetOptimizedModelFilePath(_nativePtr, optimizedModelFilepath)); + } + /// /// Enable Sequential Execution. By default, it is enabled. /// diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index 31c0ee67433fa..b406d21106b6c 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -656,6 +656,17 @@ private void TestModelSequenceOfMapStringFloat() } } + [Fact] + private void TestModelSerialization() + { + string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx"); + + // Set the graph optimization level for this session. + SessionOptions options = new SessionOptions(); + options.SetOptimizedModelFilePath("squeezenet-Transform1.onnx"); + var session = new InferenceSession(modelPath, options); + } + [GpuFact] private void TestGpu() { @@ -696,7 +707,7 @@ private void VerifyNativeMethodsExist() "OrtEnableSequentialExecution","OrtDisableSequentialExecution","OrtEnableProfiling","OrtDisableProfiling", "OrtEnableMemPattern","OrtDisableMemPattern","OrtEnableCpuMemArena","OrtDisableCpuMemArena", "OrtSetSessionLogId","OrtSetSessionLogVerbosityLevel","OrtSetSessionThreadPoolSize","OrtSetSessionGraphOptimizationLevel", - "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo", + "OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo", "OrtCreateDefaultAllocator","OrtAllocatorFree","OrtAllocatorGetInfo", "OrtCreateTensorWithDataAsOrtValue","OrtGetTensorMutableData", "OrtReleaseAllocatorInfo", "OrtCastTypeInfoToTensorInfo","OrtGetTensorTypeAndShape","OrtGetTensorElementType","OrtGetDimensionsCount", diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 03bf3a4467df3..8c34399334f85 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -201,6 +201,9 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess, */ ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output); +// Set filepath to save optimized model after graph level transformations. +ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* in, _In_ const ORTCHAR_T* optimized_model_filepath); + // create a copy of an existing OrtSessionOptions ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output); ORT_API_STATUS(OrtEnableSequentialExecution, _In_ OrtSessionOptions* options); diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index df15d2d2ecde6..7956b5146e48e 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -139,6 +139,8 @@ struct SessionOptions : Base { SessionOptions& EnableCpuMemArena(); SessionOptions& DisableCpuMemArena(); + SessionOptions& SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_file); + SessionOptions& EnableProfiling(const ORTCHAR_T* profile_file_prefix); SessionOptions& DisableProfiling(); diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index 970155aeaa383..39b7856084b25 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -137,6 +137,11 @@ inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(uint32_t graph_ return *this; } +inline SessionOptions& SessionOptions::SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_filepath) { + ORT_THROW_ON_ERROR(OrtSetOptimizedModelFilePath(p_, optimized_model_filepath)); + return *this; +} + inline SessionOptions& SessionOptions::EnableProfiling(const ORTCHAR_T* profile_file_prefix) { ORT_THROW_ON_ERROR(OrtEnableProfiling(p_, profile_file_prefix)); return *this; diff --git a/onnxruntime/core/providers/cpu/symbols.txt b/onnxruntime/core/providers/cpu/symbols.txt index 265b10260b342..658ffcf928d93 100644 --- a/onnxruntime/core/providers/cpu/symbols.txt +++ b/onnxruntime/core/providers/cpu/symbols.txt @@ -80,6 +80,8 @@ OrtSetDimensions OrtSetSessionGraphOptimizationLevel OrtSetSessionLogId OrtSetSessionLogVerbosityLevel +OrtSetSessionGraphOptimizationLevel +OrtSetOptimizedModelFilePath OrtSetSessionThreadPoolSize OrtSetTensorElementType OrtTensorProtoToOrtValue diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc index aeaab0b2488da..8027e99ec21fd 100644 --- a/onnxruntime/core/session/abi_session_options.cc +++ b/onnxruntime/core/session/abi_session_options.cc @@ -44,6 +44,12 @@ ORT_API_STATUS_IMPL(OrtDisableSequentialExecution, _In_ OrtSessionOptions* optio return nullptr; } +// set filepath to save optimized onnx model. +ORT_API_STATUS_IMPL(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath) { + options->value.optimized_model_filepath = optimized_model_filepath; + return nullptr; +} + // enable profiling for this session. ORT_API_STATUS_IMPL(OrtEnableProfiling, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* profile_file_prefix) { options->value.enable_profiling = true; diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index b421855f4e8aa..2b7fad42ba8c1 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -510,6 +510,11 @@ common::Status InferenceSession::Initialize() { // now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs. ORT_RETURN_IF_ERROR(graph.Resolve()); + if (!session_options_.optimized_model_filepath.empty()) { + // Serialize optimized onnx model. + Model::Save(*model_, session_options_.optimized_model_filepath); + } + ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution)); ORT_RETURN_IF_ERROR(session_initializer.InitializeAndSave(nullptr)); diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h index d1a7a57f046df..ae0ee2ae56185 100644 --- a/onnxruntime/core/session/inference_session.h +++ b/onnxruntime/core/session/inference_session.h @@ -56,6 +56,9 @@ struct SessionOptions { // enable profiling for this session. bool enable_profiling = false; + // non empty filepath enables serialization of the transformed optimized model to the specified filepath. + std::basic_string optimized_model_filepath; + // enable the memory pattern optimization. // The idea is if the input shapes are the same, we could trace the internal memory allocation // and generate a memory pattern for future request. So next time we could just do one allocation diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 66e5f9c0bfed0..c7879432c6593 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -332,6 +332,53 @@ TEST(InferenceSessionTests, DisableCPUArena) { RunModel(session_object, run_options); } +TEST(InferenceSessionTests, TestModelSerialization) { + // Load model with level1 tranform level and serialize model the after transformation. + SessionOptions so; + const string test_model = "testdata/transform/abs-id-max.onnx"; + so.session_logid = "InferenceSessionTests.TestModelSerialization"; + so.enable_cpu_mem_arena = false; + so.graph_optimization_level = TransformerLevel::Level1; + so.optimized_model_filepath = ToWideString(test_model) + L"-TransformLevel-" + std::to_wstring(static_cast(so.graph_optimization_level)); + InferenceSession session_object{so, &DefaultLoggingManager()}; + ASSERT_TRUE(session_object.Load(test_model).IsOK()); + ASSERT_TRUE(session_object.Initialize().IsOK()); + + // Serialize model to the same file path again to make sure that rewrite doesn't fail. + InferenceSession overwrite_session_object{so, &DefaultLoggingManager()}; + ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK()); + ASSERT_TRUE(overwrite_session_object.Initialize().IsOK()); + + // Load serialized model with no tranform level and serialize model. + SessionOptions so_opt; + so_opt.session_logid = "InferenceSessionTests.TestModelSerialization"; + so_opt.enable_cpu_mem_arena = false; + so_opt.graph_optimization_level = TransformerLevel::Default; + so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + L"-TransformLevel-" + std::to_wstring(static_cast(so_opt.graph_optimization_level)); + InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()}; + ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK()); + ASSERT_TRUE(session_object_opt.Initialize().IsOK()); + + // Assert that refeed of optimized model results with default transform level results + // in same runtime model as mlnet_encoder.onnx with TransformLevel-1. + std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary); + ASSERT_TRUE(model_fs_session1.good()); + std::ifstream model_fs_session2(so_opt.optimized_model_filepath, ios::in | ios::binary); + ASSERT_TRUE(model_fs_session2.good()); + ASSERT_TRUE(model_fs_session1.tellg() == model_fs_session2.tellg()); + model_fs_session1.seekg(0, std::ifstream::beg); + model_fs_session2.seekg(0, std::ifstream::beg); + ASSERT_TRUE(std::equal(std::istreambuf_iterator(model_fs_session1.rdbuf()), + std::istreambuf_iterator(), + std::istreambuf_iterator(model_fs_session2.rdbuf()))); + + // Execute with empty optimized model filepath doesn't fail loading. + so_opt.optimized_model_filepath = L""; + InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()}; + ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK()); + ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK()); +} + #ifdef ORT_RUN_EXTERNAL_ONNX_TESTS static bool Compare(const InputDefList& f_arg, const InputDefList& s_arg) { if (f_arg.size() != s_arg.size()) { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 906ba5693ea70..a8a50dc72cea6 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -90,6 +90,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device session_options.SetThreadPoolSize(performance_test_config.run_config.session_thread_pool_size); // Set optimization level. session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level); + session_options.SetOptimizedModelFilePath(L"temp-optimized-model.onnx"); + if (!performance_test_config.run_config.profile_file.empty()) session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str()); session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options); From 48ae9eac74d43529a0986a4541ebc9601e9e1672 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Thu, 4 Jul 2019 20:48:42 -0700 Subject: [PATCH 02/23] Removed duplicate symbol --- onnxruntime/core/providers/cpu/symbols.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/providers/cpu/symbols.txt b/onnxruntime/core/providers/cpu/symbols.txt index 658ffcf928d93..c02dade6da2a0 100644 --- a/onnxruntime/core/providers/cpu/symbols.txt +++ b/onnxruntime/core/providers/cpu/symbols.txt @@ -80,7 +80,6 @@ OrtSetDimensions OrtSetSessionGraphOptimizationLevel OrtSetSessionLogId OrtSetSessionLogVerbosityLevel -OrtSetSessionGraphOptimizationLevel OrtSetOptimizedModelFilePath OrtSetSessionThreadPoolSize OrtSetTensorElementType From b4d09aa87db33d8e54d046f520facd621c41108a Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Thu, 4 Jul 2019 22:16:08 -0700 Subject: [PATCH 03/23] Minor update --- csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index b406d21106b6c..53d587c58adbd 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -661,9 +661,10 @@ private void TestModelSerialization() { string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx"); - // Set the graph optimization level for this session. + // Set the optimized model file path to assert that no exception are thrown. SessionOptions options = new SessionOptions(); - options.SetOptimizedModelFilePath("squeezenet-Transform1.onnx"); + options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx")); + options.SetSessionGraphOptimizationLevel(1); var session = new InferenceSession(modelPath, options); } From 0d0b6fc9b8c7491d8c3c682e692271c73d28de9e Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Fri, 5 Jul 2019 17:00:50 -0700 Subject: [PATCH 04/23] Review comments --- csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 3 ++- include/onnxruntime/core/session/onnxruntime_c_api.h | 2 +- onnxruntime/test/framework/inference_session_test.cc | 3 ++- onnxruntime/test/perftest/ort_test_session.cc | 2 -- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index 53d587c58adbd..0af28907d00d8 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -663,9 +663,10 @@ private void TestModelSerialization() // Set the optimized model file path to assert that no exception are thrown. SessionOptions options = new SessionOptions(); - options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx")); + options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform-Level1.onnx")); options.SetSessionGraphOptimizationLevel(1); var session = new InferenceSession(modelPath, options); + Assert.NotNull(session); } [GpuFact] diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 8c34399334f85..1211109875757 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -202,7 +202,7 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess, ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output); // Set filepath to save optimized model after graph level transformations. -ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* in, _In_ const ORTCHAR_T* optimized_model_filepath); +ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath); // create a copy of an existing OrtSessionOptions ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output); diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index c7879432c6593..61c17e48b4766 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -333,7 +333,8 @@ TEST(InferenceSessionTests, DisableCPUArena) { } TEST(InferenceSessionTests, TestModelSerialization) { - // Load model with level1 tranform level and serialize model the after transformation. + // Load model with level1 transform level as session options + // and serialize the model after transformation. SessionOptions so; const string test_model = "testdata/transform/abs-id-max.onnx"; so.session_logid = "InferenceSessionTests.TestModelSerialization"; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a8a50dc72cea6..906ba5693ea70 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -90,8 +90,6 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device session_options.SetThreadPoolSize(performance_test_config.run_config.session_thread_pool_size); // Set optimization level. session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level); - session_options.SetOptimizedModelFilePath(L"temp-optimized-model.onnx"); - if (!performance_test_config.run_config.profile_file.empty()) session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str()); session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options); From 523b0f08ea2ee5b5f671b4bc572c7a29a1be66f5 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Mon, 22 Jul 2019 06:03:43 -0700 Subject: [PATCH 05/23] add tests --- .../Microsoft.ML.OnnxRuntime.csproj | 24 ------------------- .../InferenceTest.cs | 7 +++--- .../python/onnxruntime_pybind_state.cc | 2 ++ .../test/python/onnxruntime_test_python.py | 9 ++++++- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index 0bc9a0ed2dffc..bffa46e623c56 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -46,30 +46,6 @@ CopyToOutputDirectory="Never" Visible="false" /> - - - - Date: Thu, 4 Jul 2019 15:43:45 -0700 Subject: [PATCH 06/23] Model serialization --- .../Microsoft.ML.OnnxRuntime.csproj | 24 ++++++++++ .../Microsoft.ML.OnnxRuntime/NativeMethods.cs | 3 ++ .../SessionOptions.cs | 9 ++++ .../InferenceTest.cs | 13 ++++- .../core/session/onnxruntime_c_api.h | 3 ++ .../core/session/onnxruntime_cxx_api.h | 2 + .../core/session/onnxruntime_cxx_inline.h | 5 ++ onnxruntime/core/providers/cpu/symbols.txt | 2 + .../core/session/abi_session_options.cc | 6 +++ onnxruntime/core/session/inference_session.cc | 5 ++ onnxruntime/core/session/inference_session.h | 3 ++ .../test/framework/inference_session_test.cc | 47 +++++++++++++++++++ onnxruntime/test/perftest/ort_test_session.cc | 2 + 13 files changed, 123 insertions(+), 1 deletion(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index bffa46e623c56..0bc9a0ed2dffc 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -46,6 +46,30 @@ CopyToOutputDirectory="Never" Visible="false" /> + + + + + /// Set filepath to save optimized model after graph level transformations. + /// + /// File path for saving optimized model. + public void SetOptimizedModelFilePath(string optimizedModelFilepath) + { + NativeApiStatus.VerifySuccess(NativeMethods.OrtSetOptimizedModelFilePath(_nativePtr, optimizedModelFilepath)); + } + /// /// Enable Sequential Execution. By default, it is enabled. /// diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index 88bf5f83d4c8f..4bcbe21d37cfd 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -638,6 +638,17 @@ private void TestModelSequenceOfMapStringFloat() } } + [Fact] + private void TestModelSerialization() + { + string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx"); + + // Set the graph optimization level for this session. + SessionOptions options = new SessionOptions(); + options.SetOptimizedModelFilePath("squeezenet-Transform1.onnx"); + var session = new InferenceSession(modelPath, options); + } + [GpuFact] private void TestGpu() { @@ -678,7 +689,7 @@ private void VerifyNativeMethodsExist() "OrtEnableSequentialExecution","OrtDisableSequentialExecution","OrtEnableProfiling","OrtDisableProfiling", "OrtEnableMemPattern","OrtDisableMemPattern","OrtEnableCpuMemArena","OrtDisableCpuMemArena", "OrtSetSessionLogId","OrtSetSessionLogVerbosityLevel","OrtSetSessionThreadPoolSize","OrtSetSessionGraphOptimizationLevel", - "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo", + "OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo", "OrtCreateDefaultAllocator","OrtAllocatorFree","OrtAllocatorGetInfo", "OrtCreateTensorWithDataAsOrtValue","OrtGetTensorMutableData", "OrtReleaseAllocatorInfo", "OrtCastTypeInfoToTensorInfo","OrtGetTensorTypeAndShape","OrtGetTensorElementType","OrtGetDimensionsCount", diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index c0c5cb8ef2ecd..c3023f59221dd 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -201,6 +201,9 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess, */ ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output); +// Set filepath to save optimized model after graph level transformations. +ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* in, _In_ const ORTCHAR_T* optimized_model_filepath); + // create a copy of an existing OrtSessionOptions ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output); ORT_API_STATUS(OrtEnableSequentialExecution, _In_ OrtSessionOptions* options); diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 8cd8dea637c15..067ea1bcdcf2f 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -139,6 +139,8 @@ struct SessionOptions : Base { SessionOptions& EnableCpuMemArena(); SessionOptions& DisableCpuMemArena(); + SessionOptions& SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_file); + SessionOptions& EnableProfiling(const ORTCHAR_T* profile_file_prefix); SessionOptions& DisableProfiling(); diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index ccb7af2616b05..a81a0293868c2 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -137,6 +137,11 @@ inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(uint32_t graph_ return *this; } +inline SessionOptions& SessionOptions::SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_filepath) { + ORT_THROW_ON_ERROR(OrtSetOptimizedModelFilePath(p_, optimized_model_filepath)); + return *this; +} + inline SessionOptions& SessionOptions::EnableProfiling(const ORTCHAR_T* profile_file_prefix) { ORT_THROW_ON_ERROR(OrtEnableProfiling(p_, profile_file_prefix)); return *this; diff --git a/onnxruntime/core/providers/cpu/symbols.txt b/onnxruntime/core/providers/cpu/symbols.txt index fc4859442c667..e78e3ac6b71af 100644 --- a/onnxruntime/core/providers/cpu/symbols.txt +++ b/onnxruntime/core/providers/cpu/symbols.txt @@ -81,6 +81,8 @@ OrtSetDimensions OrtSetSessionGraphOptimizationLevel OrtSetSessionLogId OrtSetSessionLogVerbosityLevel +OrtSetSessionGraphOptimizationLevel +OrtSetOptimizedModelFilePath OrtSetSessionThreadPoolSize OrtSetTensorElementType OrtTensorProtoToOrtValue diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc index aeaab0b2488da..8027e99ec21fd 100644 --- a/onnxruntime/core/session/abi_session_options.cc +++ b/onnxruntime/core/session/abi_session_options.cc @@ -44,6 +44,12 @@ ORT_API_STATUS_IMPL(OrtDisableSequentialExecution, _In_ OrtSessionOptions* optio return nullptr; } +// set filepath to save optimized onnx model. +ORT_API_STATUS_IMPL(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath) { + options->value.optimized_model_filepath = optimized_model_filepath; + return nullptr; +} + // enable profiling for this session. ORT_API_STATUS_IMPL(OrtEnableProfiling, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* profile_file_prefix) { options->value.enable_profiling = true; diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index c4ed8be620680..20581f6322a32 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -528,6 +528,11 @@ common::Status InferenceSession::Initialize() { // now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs. ORT_RETURN_IF_ERROR(graph.Resolve()); + if (!session_options_.optimized_model_filepath.empty()) { + // Serialize optimized onnx model. + Model::Save(*model_, session_options_.optimized_model_filepath); + } + ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution)); ORT_RETURN_IF_ERROR(session_initializer.InitializeAndSave(nullptr)); diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h index fe428006b4f0a..9b835eaa449d3 100644 --- a/onnxruntime/core/session/inference_session.h +++ b/onnxruntime/core/session/inference_session.h @@ -56,6 +56,9 @@ struct SessionOptions { // enable profiling for this session. bool enable_profiling = false; + // non empty filepath enables serialization of the transformed optimized model to the specified filepath. + std::basic_string optimized_model_filepath; + // enable the memory pattern optimization. // The idea is if the input shapes are the same, we could trace the internal memory allocation // and generate a memory pattern for future request. So next time we could just do one allocation diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 1c4cdad4f867e..0c63e3c6e165e 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -330,6 +330,53 @@ TEST(InferenceSessionTests, DisableCPUArena) { RunModel(session_object, run_options); } +TEST(InferenceSessionTests, TestModelSerialization) { + // Load model with level1 tranform level and serialize model the after transformation. + SessionOptions so; + const string test_model = "testdata/transform/abs-id-max.onnx"; + so.session_logid = "InferenceSessionTests.TestModelSerialization"; + so.enable_cpu_mem_arena = false; + so.graph_optimization_level = TransformerLevel::Level1; + so.optimized_model_filepath = ToWideString(test_model) + L"-TransformLevel-" + std::to_wstring(static_cast(so.graph_optimization_level)); + InferenceSession session_object{so, &DefaultLoggingManager()}; + ASSERT_TRUE(session_object.Load(test_model).IsOK()); + ASSERT_TRUE(session_object.Initialize().IsOK()); + + // Serialize model to the same file path again to make sure that rewrite doesn't fail. + InferenceSession overwrite_session_object{so, &DefaultLoggingManager()}; + ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK()); + ASSERT_TRUE(overwrite_session_object.Initialize().IsOK()); + + // Load serialized model with no tranform level and serialize model. + SessionOptions so_opt; + so_opt.session_logid = "InferenceSessionTests.TestModelSerialization"; + so_opt.enable_cpu_mem_arena = false; + so_opt.graph_optimization_level = TransformerLevel::Default; + so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + L"-TransformLevel-" + std::to_wstring(static_cast(so_opt.graph_optimization_level)); + InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()}; + ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK()); + ASSERT_TRUE(session_object_opt.Initialize().IsOK()); + + // Assert that refeed of optimized model results with default transform level results + // in same runtime model as mlnet_encoder.onnx with TransformLevel-1. + std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary); + ASSERT_TRUE(model_fs_session1.good()); + std::ifstream model_fs_session2(so_opt.optimized_model_filepath, ios::in | ios::binary); + ASSERT_TRUE(model_fs_session2.good()); + ASSERT_TRUE(model_fs_session1.tellg() == model_fs_session2.tellg()); + model_fs_session1.seekg(0, std::ifstream::beg); + model_fs_session2.seekg(0, std::ifstream::beg); + ASSERT_TRUE(std::equal(std::istreambuf_iterator(model_fs_session1.rdbuf()), + std::istreambuf_iterator(), + std::istreambuf_iterator(model_fs_session2.rdbuf()))); + + // Execute with empty optimized model filepath doesn't fail loading. + so_opt.optimized_model_filepath = L""; + InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()}; + ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK()); + ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK()); +} + #ifdef ORT_RUN_EXTERNAL_ONNX_TESTS static bool Compare(const InputDefList& f_arg, const InputDefList& s_arg) { if (f_arg.size() != s_arg.size()) { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 906ba5693ea70..a8a50dc72cea6 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -90,6 +90,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device session_options.SetThreadPoolSize(performance_test_config.run_config.session_thread_pool_size); // Set optimization level. session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level); + session_options.SetOptimizedModelFilePath(L"temp-optimized-model.onnx"); + if (!performance_test_config.run_config.profile_file.empty()) session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str()); session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options); From 35f5b90a1253858d0c9b33b1f4c4ba7bcb475193 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Thu, 4 Jul 2019 20:48:42 -0700 Subject: [PATCH 07/23] Removed duplicate symbol --- onnxruntime/core/providers/cpu/symbols.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/providers/cpu/symbols.txt b/onnxruntime/core/providers/cpu/symbols.txt index e78e3ac6b71af..98e700f837f46 100644 --- a/onnxruntime/core/providers/cpu/symbols.txt +++ b/onnxruntime/core/providers/cpu/symbols.txt @@ -81,7 +81,6 @@ OrtSetDimensions OrtSetSessionGraphOptimizationLevel OrtSetSessionLogId OrtSetSessionLogVerbosityLevel -OrtSetSessionGraphOptimizationLevel OrtSetOptimizedModelFilePath OrtSetSessionThreadPoolSize OrtSetTensorElementType From 0220e7d8071bd276d1b0e1eddf1b64fd0d69034f Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Thu, 4 Jul 2019 22:16:08 -0700 Subject: [PATCH 08/23] Minor update --- csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index 4bcbe21d37cfd..f3e5059020cd0 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -643,9 +643,10 @@ private void TestModelSerialization() { string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx"); - // Set the graph optimization level for this session. + // Set the optimized model file path to assert that no exception are thrown. SessionOptions options = new SessionOptions(); - options.SetOptimizedModelFilePath("squeezenet-Transform1.onnx"); + options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx")); + options.SetSessionGraphOptimizationLevel(1); var session = new InferenceSession(modelPath, options); } From 96d9345405fe6933cc6e91228ef875f4efe23f80 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Fri, 5 Jul 2019 23:59:51 +0000 Subject: [PATCH 09/23] Merged PR 1106437: Model Serialization in onnxruntime From d82f78e9422077148f53603eb6cbfd9ce09a6222 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Fri, 5 Jul 2019 17:00:50 -0700 Subject: [PATCH 10/23] Review comments --- csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 3 ++- include/onnxruntime/core/session/onnxruntime_c_api.h | 2 +- onnxruntime/test/framework/inference_session_test.cc | 3 ++- onnxruntime/test/perftest/ort_test_session.cc | 2 -- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs index f3e5059020cd0..0c96f5d2781cc 100644 --- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs +++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs @@ -645,9 +645,10 @@ private void TestModelSerialization() // Set the optimized model file path to assert that no exception are thrown. SessionOptions options = new SessionOptions(); - options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform1.onnx")); + options.SetOptimizedModelFilePath(Path.Combine(Directory.GetCurrentDirectory(), "squeezenet-Transform-Level1.onnx")); options.SetSessionGraphOptimizationLevel(1); var session = new InferenceSession(modelPath, options); + Assert.NotNull(session); } [GpuFact] diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index c3023f59221dd..62b1f1051bf8f 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -202,7 +202,7 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess, ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output); // Set filepath to save optimized model after graph level transformations. -ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* in, _In_ const ORTCHAR_T* optimized_model_filepath); +ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath); // create a copy of an existing OrtSessionOptions ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output); diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 0c63e3c6e165e..117c2807d740a 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -331,7 +331,8 @@ TEST(InferenceSessionTests, DisableCPUArena) { } TEST(InferenceSessionTests, TestModelSerialization) { - // Load model with level1 tranform level and serialize model the after transformation. + // Load model with level1 transform level as session options + // and serialize the model after transformation. SessionOptions so; const string test_model = "testdata/transform/abs-id-max.onnx"; so.session_logid = "InferenceSessionTests.TestModelSerialization"; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index a8a50dc72cea6..906ba5693ea70 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -90,8 +90,6 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device session_options.SetThreadPoolSize(performance_test_config.run_config.session_thread_pool_size); // Set optimization level. session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level); - session_options.SetOptimizedModelFilePath(L"temp-optimized-model.onnx"); - if (!performance_test_config.run_config.profile_file.empty()) session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str()); session_ = Ort::Session(env, performance_test_config.model_info.model_file_path.c_str(), session_options); From 09000312348f35b65fb0b5a4170efb0548d6c591 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Sat, 6 Jul 2019 00:01:33 +0000 Subject: [PATCH 11/23] Merged PR 1107226: Review comments Review comments From 57d4f5aa57bfc414f2b9582588d54fdf5dbb956e Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Mon, 22 Jul 2019 06:03:43 -0700 Subject: [PATCH 12/23] add tests --- .../Microsoft.ML.OnnxRuntime.csproj | 24 ------------------- .../InferenceTest.cs | 5 ++-- .../python/onnxruntime_pybind_state.cc | 2 ++ .../test/python/onnxruntime_test_python.py | 9 ++++++- 4 files changed, 13 insertions(+), 27 deletions(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj index 0bc9a0ed2dffc..bffa46e623c56 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj +++ b/csharp/src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj @@ -46,30 +46,6 @@ CopyToOutputDirectory="Never" Visible="false" /> - - - - Date: Mon, 22 Jul 2019 13:48:22 -0700 Subject: [PATCH 13/23] Fixed merge conflict --- onnxruntime/test/python/onnxruntime_test_python.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index d2cf3fe63aacf..efdbb7443ad3a 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -42,6 +42,14 @@ def testModelSerialization(self): onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so) self.assertTrue(os.path.isfile(so.optimized_model_filepath)) + def testModelSerialization(self): + so = onnxrt.SessionOptions() + so.session_log_verbosity_level = 1 + so.session_logid = "TestModelSerialization" + so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx" + onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so) + self.assertTrue(os.path.isfile(so.optimized_model_filepath)) + def testRunModel(self): x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) input_name = sess.get_inputs()[0].name From b5e45d3c2ae26f049838ff2bb8577a7b180b940e Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Mon, 22 Jul 2019 23:01:14 -0700 Subject: [PATCH 14/23] Correct python tests --- .../test/python/onnxruntime_test_python.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 65b833d9e06fe..bfd8d5d7c898a 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -42,23 +42,8 @@ def testModelSerialization(self): onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so) self.assertTrue(os.path.isfile(so.optimized_model_filepath)) - def testModelSerialization(self): - so = onnxrt.SessionOptions() - so.session_log_verbosity_level = 1 - so.session_logid = "TestModelSerialization" - so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx" - onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so) - self.assertTrue(os.path.isfile(so.optimized_model_filepath)) - - def testModelSerialization(self): - so = onnxrt.SessionOptions() - so.session_log_verbosity_level = 1 - so.session_logid = "TestModelSerialization" - so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx" - onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so) - self.assertTrue(os.path.isfile(so.optimized_model_filepath)) - def testRunModel(self): + sess = onnxrt.InferenceSession(self.get_name("mul_1.onnx")) x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) input_name = sess.get_inputs()[0].name self.assertEqual(input_name, "X") From e30213d3d193cc7e7636cf4ff10798d9b9c13df3 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Mon, 5 Aug 2019 11:51:57 -0700 Subject: [PATCH 15/23] InferenceSesssion Refeed Test --- onnxruntime/core/session/inference_session.cc | 2 +- .../test/framework/inference_session_test.cc | 39 ++++++++++++++++--- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 2b7fad42ba8c1..68e7e72ea5802 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -512,7 +512,7 @@ common::Status InferenceSession::Initialize() { if (!session_options_.optimized_model_filepath.empty()) { // Serialize optimized onnx model. - Model::Save(*model_, session_options_.optimized_model_filepath); + ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath)); } ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution)); diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 61c17e48b4766..31ab2cf558f37 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -120,6 +120,18 @@ class FuseExecutionProvider : public IExecutionProvider { } }; +// InferenceSession wrapper to expose loaded graph. +class InferenceSessionGetGraphWrapper : public InferenceSession { + public: + explicit InferenceSessionGetGraphWrapper(const SessionOptions& session_options, + logging::LoggingManager* logging_manager) : InferenceSession(session_options, logging_manager) { + } + + const Graph& GetGraph() { + return model_->MainGraph(); + } +}; + namespace test { static void VerifyOutputs(const std::vector& fetches, const std::vector& expected_dims, const std::vector& expected_values); @@ -339,13 +351,28 @@ TEST(InferenceSessionTests, TestModelSerialization) { const string test_model = "testdata/transform/abs-id-max.onnx"; so.session_logid = "InferenceSessionTests.TestModelSerialization"; so.enable_cpu_mem_arena = false; + so.graph_optimization_level = TransformerLevel::Default; + InferenceSessionGetGraphWrapper session_object_noopt{so, &DefaultLoggingManager()}; + ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK()); + ASSERT_TRUE(session_object_noopt.Initialize().IsOK()); + + // Assert that model has been transformed. + const auto& graph_noopt = session_object_noopt.GetGraph(); + std::map op_to_count_noopt = CountOpsInGraph(graph_noopt); + ASSERT_TRUE(op_to_count_noopt["Identity"] > 0); + so.graph_optimization_level = TransformerLevel::Level1; - so.optimized_model_filepath = ToWideString(test_model) + L"-TransformLevel-" + std::to_wstring(static_cast(so.graph_optimization_level)); - InferenceSession session_object{so, &DefaultLoggingManager()}; + so.optimized_model_filepath = ToWideString(test_model + "-TransformLevel-" + std::to_string(static_cast(so.graph_optimization_level))); + InferenceSessionGetGraphWrapper session_object{so, &DefaultLoggingManager()}; ASSERT_TRUE(session_object.Load(test_model).IsOK()); ASSERT_TRUE(session_object.Initialize().IsOK()); + + // Assert that model has been transformed and identify Node is removed. + const auto& graph = session_object.GetGraph(); + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Identity"] == 0); - // Serialize model to the same file path again to make sure that rewrite doesn't fail. + // Serialize model to the same file path again to make sure that rewrite doesn't fail. InferenceSession overwrite_session_object{so, &DefaultLoggingManager()}; ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK()); ASSERT_TRUE(overwrite_session_object.Initialize().IsOK()); @@ -355,13 +382,13 @@ TEST(InferenceSessionTests, TestModelSerialization) { so_opt.session_logid = "InferenceSessionTests.TestModelSerialization"; so_opt.enable_cpu_mem_arena = false; so_opt.graph_optimization_level = TransformerLevel::Default; - so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + L"-TransformLevel-" + std::to_wstring(static_cast(so_opt.graph_optimization_level)); + so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + ToWideString("-TransformLevel-" + std::to_string(static_cast(so_opt.graph_optimization_level))); InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()}; ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK()); ASSERT_TRUE(session_object_opt.Initialize().IsOK()); - // Assert that refeed of optimized model results with default transform level results - // in same runtime model as mlnet_encoder.onnx with TransformLevel-1. + // Assert that refeed of optimized model with default transform level results + // in same runtime model as abs-id-max.onnx with TransformLevel-1. std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary); ASSERT_TRUE(model_fs_session1.good()); std::ifstream model_fs_session2(so_opt.optimized_model_filepath, ios::in | ios::binary); From f47922a7d2fdd591cddecdc0da9074493041d2d3 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Mon, 5 Aug 2019 13:09:05 -0700 Subject: [PATCH 16/23] Replace use of widechar const literal-L --- onnxruntime/test/framework/inference_session_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 04333927a73a9..f46f70ac06ad1 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -399,7 +399,7 @@ TEST(InferenceSessionTests, TestModelSerialization) { std::istreambuf_iterator(model_fs_session2.rdbuf()))); // Execute with empty optimized model filepath doesn't fail loading. - so_opt.optimized_model_filepath = L""; + so_opt.optimized_model_filepath = ToWideString(""); InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()}; ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK()); ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK()); From f500ee7891ff64ac1d4317dbe8e09d1c921ae6c8 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Tue, 6 Aug 2019 00:30:27 -0700 Subject: [PATCH 17/23] Fixed failing tests --- csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs index e0bf31d76fd23..a525a30862ab8 100644 --- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs +++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs @@ -131,7 +131,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca public static extern IntPtr /*(OrtStatus*)*/ OrtDisableSequentialExecution(IntPtr /*(OrtSessionOptions*)*/ options); [DllImport(nativeLib, CharSet = charSet)] - public static extern IntPtr /*(OrtStatus*)*/ OrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, string optimizedModelFilepath); + public static extern IntPtr /*(OrtStatus*)*/ OrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, [MarshalAs(UnmanagedType.LPWStr)]string optimizedModelFilepath); [DllImport(nativeLib, CharSet = charSet)] public static extern IntPtr /*(OrtStatus*)*/ OrtEnableProfiling(IntPtr /* OrtSessionOptions* */ options, string profilePathPrefix); From 6b938d9c851c26d8dc1191558618e5e1f64e2522 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Tue, 6 Aug 2019 15:45:57 -0700 Subject: [PATCH 18/23] Updated comment --- onnxruntime/test/framework/inference_session_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index f46f70ac06ad1..cab9b2aaf1173 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -354,7 +354,7 @@ TEST(InferenceSessionTests, TestModelSerialization) { ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK()); ASSERT_TRUE(session_object_noopt.Initialize().IsOK()); - // Assert that model has been transformed. + // Assert that model has been Identity Nodes. const auto& graph_noopt = session_object_noopt.GetGraph(); std::map op_to_count_noopt = CountOpsInGraph(graph_noopt); ASSERT_TRUE(op_to_count_noopt["Identity"] > 0); From 88f56a61e1103473cfbb5aa0bd5085eeeca14c82 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Wed, 7 Aug 2019 15:13:43 -0700 Subject: [PATCH 19/23] Removed unnecessary session options --- onnxruntime/test/framework/inference_session_test.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index cab9b2aaf1173..b883c7c0bfd6a 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -348,7 +348,6 @@ TEST(InferenceSessionTests, TestModelSerialization) { SessionOptions so; const string test_model = "testdata/transform/abs-id-max.onnx"; so.session_logid = "InferenceSessionTests.TestModelSerialization"; - so.enable_cpu_mem_arena = false; so.graph_optimization_level = TransformerLevel::Default; InferenceSessionGetGraphWrapper session_object_noopt{so, &DefaultLoggingManager()}; ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK()); @@ -378,7 +377,6 @@ TEST(InferenceSessionTests, TestModelSerialization) { // Load serialized model with no tranform level and serialize model. SessionOptions so_opt; so_opt.session_logid = "InferenceSessionTests.TestModelSerialization"; - so_opt.enable_cpu_mem_arena = false; so_opt.graph_optimization_level = TransformerLevel::Default; so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + ToWideString("-TransformLevel-" + std::to_string(static_cast(so_opt.graph_optimization_level))); InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()}; From 05fbef55de92e2a23f5b9b00c35e536fdbb61c08 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Wed, 7 Aug 2019 15:56:09 -0700 Subject: [PATCH 20/23] Spell check on comments --- onnxruntime/core/session/inference_session.cc | 2 +- .../test/framework/inference_session_test.cc | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 8e0dee067b712..60e2bd7af0c2d 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -529,7 +529,7 @@ common::Status InferenceSession::Initialize() { ORT_RETURN_IF_ERROR(graph.Resolve()); if (!session_options_.optimized_model_filepath.empty()) { - // Serialize optimized onnx model. + // Serialize optimized ONNX model. ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath)); } diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index b883c7c0bfd6a..9f9ee292a4198 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -343,8 +343,8 @@ TEST(InferenceSessionTests, DisableCPUArena) { } TEST(InferenceSessionTests, TestModelSerialization) { - // Load model with level1 transform level as session options - // and serialize the model after transformation. + // Load model with level 0 transform level + // and assert that the model has Identity nodes. SessionOptions so; const string test_model = "testdata/transform/abs-id-max.onnx"; so.session_logid = "InferenceSessionTests.TestModelSerialization"; @@ -353,18 +353,19 @@ TEST(InferenceSessionTests, TestModelSerialization) { ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK()); ASSERT_TRUE(session_object_noopt.Initialize().IsOK()); - // Assert that model has been Identity Nodes. + // Assert that model has Identity Nodes. const auto& graph_noopt = session_object_noopt.GetGraph(); std::map op_to_count_noopt = CountOpsInGraph(graph_noopt); ASSERT_TRUE(op_to_count_noopt["Identity"] > 0); + // Load model with level 1 transform level. so.graph_optimization_level = TransformerLevel::Level1; so.optimized_model_filepath = ToWideString(test_model + "-TransformLevel-" + std::to_string(static_cast(so.graph_optimization_level))); InferenceSessionGetGraphWrapper session_object{so, &DefaultLoggingManager()}; ASSERT_TRUE(session_object.Load(test_model).IsOK()); ASSERT_TRUE(session_object.Initialize().IsOK()); - // Assert that model has been transformed and identify Node is removed. + // Assert that model has been transformed and identity Node is removed. const auto& graph = session_object.GetGraph(); std::map op_to_count = CountOpsInGraph(graph); ASSERT_TRUE(op_to_count["Identity"] == 0); @@ -374,7 +375,7 @@ TEST(InferenceSessionTests, TestModelSerialization) { ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK()); ASSERT_TRUE(overwrite_session_object.Initialize().IsOK()); - // Load serialized model with no tranform level and serialize model. + // Load serialized model with no transform level and serialize model. SessionOptions so_opt; so_opt.session_logid = "InferenceSessionTests.TestModelSerialization"; so_opt.graph_optimization_level = TransformerLevel::Default; @@ -383,7 +384,7 @@ TEST(InferenceSessionTests, TestModelSerialization) { ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK()); ASSERT_TRUE(session_object_opt.Initialize().IsOK()); - // Assert that refeed of optimized model with default transform level results + // Assert that re-feed of optimized model with default transform level results // in same runtime model as abs-id-max.onnx with TransformLevel-1. std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary); ASSERT_TRUE(model_fs_session1.good()); @@ -396,7 +397,7 @@ TEST(InferenceSessionTests, TestModelSerialization) { std::istreambuf_iterator(), std::istreambuf_iterator(model_fs_session2.rdbuf()))); - // Execute with empty optimized model filepath doesn't fail loading. + // Execute with empty optimized model file-path doesn't fail loading. so_opt.optimized_model_filepath = ToWideString(""); InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()}; ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK()); From 318491cdb1f99c89b7a72e95e842e4088a0ecd96 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Wed, 7 Aug 2019 19:17:18 -0700 Subject: [PATCH 21/23] Do not serialize when level 3 optimization specified --- onnxruntime/core/session/inference_session.cc | 2 +- onnxruntime/test/framework/inference_session_test.cc | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 60e2bd7af0c2d..e3d1a27187fd3 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -528,7 +528,7 @@ common::Status InferenceSession::Initialize() { // now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs. ORT_RETURN_IF_ERROR(graph.Resolve()); - if (!session_options_.optimized_model_filepath.empty()) { + if (session_options_.graph_optimization_level < TransformerLevel::Level3 && !session_options_.optimized_model_filepath.empty()) { // Serialize optimized ONNX model. ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath)); } diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 9f9ee292a4198..9d4f2e419371c 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -397,11 +397,20 @@ TEST(InferenceSessionTests, TestModelSerialization) { std::istreambuf_iterator(), std::istreambuf_iterator(model_fs_session2.rdbuf()))); - // Execute with empty optimized model file-path doesn't fail loading. + // Assert that empty optimized model file-path doesn't fail loading. so_opt.optimized_model_filepath = ToWideString(""); InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()}; ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK()); ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK()); + + // Assert that level 3 optimization doesn't result in serialized model. + so_opt.optimized_model_filepath = ToWideString("ShouldNotSerialize"); + so_opt.graph_optimization_level = TransformerLevel::Level3; + InferenceSession session_object_Level3Test{so_opt, &DefaultLoggingManager()}; + ASSERT_TRUE(session_object_Level3Test.Load(test_model).IsOK()); + ASSERT_TRUE(session_object_Level3Test.Initialize().IsOK()); + std::ifstream model_fs_Level3(so_opt.optimized_model_filepath, ios::in | ios::binary); + ASSERT_TRUE(model_fs_Level3.fail()); } #ifdef ORT_RUN_EXTERNAL_ONNX_TESTS From 779bcd02470996b90772c303a300e2d386eb11ec Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Thu, 8 Aug 2019 16:39:38 -0700 Subject: [PATCH 22/23] Updated error logs --- onnxruntime/core/session/inference_session.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index e3d1a27187fd3..5c87efa467ca6 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -528,9 +528,14 @@ common::Status InferenceSession::Initialize() { // now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs. ORT_RETURN_IF_ERROR(graph.Resolve()); - if (session_options_.graph_optimization_level < TransformerLevel::Level3 && !session_options_.optimized_model_filepath.empty()) { - // Serialize optimized ONNX model. - ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath)); + if (!session_options_.optimized_model_filepath.empty()) { + if (session_options_.graph_optimization_level < TransformerLevel::Level3) { + // Serialize optimized ONNX model. + ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath)); + } else { + LOGS(*session_logger_, ERROR) << "Serializing Optimized ONNX model with Graph Optimization" + " level greater than 2 is not supported."; + } } ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution)); From ba6ce3a8bdbc36067e826e4754c6b2db5c348da0 Mon Sep 17 00:00:00 2001 From: Pulkit Tomar Date: Fri, 9 Aug 2019 12:06:06 -0700 Subject: [PATCH 23/23] Changed log severity to WARN --- onnxruntime/core/session/inference_session.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 5c87efa467ca6..ed097d11f1482 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -533,7 +533,7 @@ common::Status InferenceSession::Initialize() { // Serialize optimized ONNX model. ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath)); } else { - LOGS(*session_logger_, ERROR) << "Serializing Optimized ONNX model with Graph Optimization" + LOGS(*session_logger_, WARNING) << "Serializing Optimized ONNX model with Graph Optimization" " level greater than 2 is not supported."; } }