Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialize optimized onnx model #1470

Merged
merged 29 commits into from
Aug 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
[DllImport(nativeLib, CharSet = charSet)]
public static extern IntPtr /*(OrtStatus*)*/ OrtDisableSequentialExecution(IntPtr /*(OrtSessionOptions*)*/ options);

[DllImport(nativeLib, CharSet = charSet)]
public static extern IntPtr /*(OrtStatus*)*/ OrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, [MarshalAs(UnmanagedType.LPWStr)]string optimizedModelFilepath);

[DllImport(nativeLib, CharSet = charSet)]
public static extern IntPtr /*(OrtStatus*)*/ OrtEnableProfiling(IntPtr /* OrtSessionOptions* */ options, string profilePathPrefix);

Expand Down
9 changes: 9 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ public void SetSessionGraphOptimizationLevel(uint optimization_level)
NativeApiStatus.VerifySuccess(NativeMethods.OrtSetSessionGraphOptimizationLevel(_nativePtr, optimization_level));
}

/// <summary>
/// Set filepath to save optimized model after graph level transformations.
/// </summary>
/// <param name="optimizedModelFilepath">File path for saving optimized model.</param>
public void SetOptimizedModelFilePath(string optimizedModelFilepath)
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtSetOptimizedModelFilePath(_nativePtr, optimizedModelFilepath));
}

/// <summary>
/// Enable Sequential Execution. By default, it is enabled.
/// </summary>
Expand Down
16 changes: 15 additions & 1 deletion csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,20 @@ private void TestModelSequenceOfMapStringFloat()
}
}

[Fact]
private void TestModelSerialization()
{
string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
string modelOutputPath = Path.Combine(Directory.GetCurrentDirectory(), "optimized-squeezenet.onnx");
// Set the optimized model file path to assert that no exception are thrown.
SessionOptions options = new SessionOptions();
options.SetOptimizedModelFilePath(modelOutputPath);
options.SetSessionGraphOptimizationLevel(1);
var session = new InferenceSession(modelPath, options);
Assert.NotNull(session);
Assert.True(File.Exists(modelOutputPath));
}

[GpuFact]
private void TestGpu()
{
Expand Down Expand Up @@ -678,7 +692,7 @@ private void VerifyNativeMethodsExist()
"OrtEnableSequentialExecution","OrtDisableSequentialExecution","OrtEnableProfiling","OrtDisableProfiling",
"OrtEnableMemPattern","OrtDisableMemPattern","OrtEnableCpuMemArena","OrtDisableCpuMemArena",
"OrtSetSessionLogId","OrtSetSessionLogVerbosityLevel","OrtSetSessionThreadPoolSize","OrtSetSessionGraphOptimizationLevel",
"OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo",
"OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU","OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo",
"OrtCreateDefaultAllocator","OrtAllocatorFree","OrtAllocatorGetInfo",
"OrtCreateTensorWithDataAsOrtValue","OrtGetTensorMutableData", "OrtReleaseAllocatorInfo",
"OrtCastTypeInfoToTensorInfo","OrtGetTensorTypeAndShape","OrtGetTensorElementType","OrtGetDimensionsCount",
Expand Down
3 changes: 3 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ ORT_API_STATUS(OrtRun, _Inout_ OrtSession* sess,
*/
ORT_API_STATUS(OrtCreateSessionOptions, _Out_ OrtSessionOptions** output);

// Set filepath to save optimized model after graph level transformations.
ORT_API_STATUS(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath);

// create a copy of an existing OrtSessionOptions
ORT_API_STATUS(OrtCloneSessionOptions, _In_ OrtSessionOptions* in, _Out_ OrtSessionOptions** output);
ORT_API_STATUS(OrtEnableSequentialExecution, _In_ OrtSessionOptions* options);
Expand Down
2 changes: 2 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ struct SessionOptions : Base<OrtSessionOptions> {
SessionOptions& EnableCpuMemArena();
SessionOptions& DisableCpuMemArena();

SessionOptions& SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_file);

SessionOptions& EnableProfiling(const ORTCHAR_T* profile_file_prefix);
SessionOptions& DisableProfiling();

Expand Down
5 changes: 5 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,11 @@ inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(uint32_t graph_
return *this;
}

inline SessionOptions& SessionOptions::SetOptimizedModelFilePath(const ORTCHAR_T* optimized_model_filepath) {
ORT_THROW_ON_ERROR(OrtSetOptimizedModelFilePath(p_, optimized_model_filepath));
return *this;
}

inline SessionOptions& SessionOptions::EnableProfiling(const ORTCHAR_T* profile_file_prefix) {
ORT_THROW_ON_ERROR(OrtEnableProfiling(p_, profile_file_prefix));
return *this;
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/cpu/symbols.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ OrtSetDimensions
OrtSetSessionGraphOptimizationLevel
OrtSetSessionLogId
OrtSetSessionLogVerbosityLevel
OrtSetOptimizedModelFilePath
OrtSetSessionThreadPoolSize
OrtSetTensorElementType
OrtTensorProtoToOrtValue
6 changes: 6 additions & 0 deletions onnxruntime/core/session/abi_session_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ ORT_API_STATUS_IMPL(OrtDisableSequentialExecution, _In_ OrtSessionOptions* optio
return nullptr;
}

// set filepath to save optimized onnx model.
ORT_API_STATUS_IMPL(OrtSetOptimizedModelFilePath, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* optimized_model_filepath) {
options->value.optimized_model_filepath = optimized_model_filepath;
return nullptr;
}

// enable profiling for this session.
ORT_API_STATUS_IMPL(OrtEnableProfiling, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* profile_file_prefix) {
options->value.enable_profiling = true;
Expand Down
10 changes: 10 additions & 0 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,16 @@ common::Status InferenceSession::Initialize() {
// now that all the transforms are done, call Resolve on the main graph. this will recurse into the subgraphs.
ORT_RETURN_IF_ERROR(graph.Resolve());

if (!session_options_.optimized_model_filepath.empty()) {
if (session_options_.graph_optimization_level < TransformerLevel::Level3) {
// Serialize optimized ONNX model.
ORT_RETURN_IF_ERROR(Model::Save(*model_, session_options_.optimized_model_filepath));
} else {
LOGS(*session_logger_, WARNING) << "Serializing Optimized ONNX model with Graph Optimization"
" level greater than 2 is not supported.";
}
}

ORT_RETURN_IF_ERROR(session_initializer.CreatePlan(nullptr, nullptr, session_options_.enable_sequential_execution));
ORT_RETURN_IF_ERROR(session_initializer.InitializeAndSave(nullptr));

Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/session/inference_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ struct SessionOptions {
// enable profiling for this session.
bool enable_profiling = false;

// non empty filepath enables serialization of the transformed optimized model to the specified filepath.
std::basic_string<ORTCHAR_T> optimized_model_filepath;

// enable the memory pattern optimization.
// The idea is if the input shapes are the same, we could trace the internal memory allocation
// and generate a memory pattern for future request. So next time we could just do one allocation
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,8 @@ void addObjectMethods(py::module& m) {
Set this option to false if you don't want it. Default is True.)pbdoc")
.def_readwrite("enable_profiling", &SessionOptions::enable_profiling,
R"pbdoc(Enable profiling for this session. Default is false.)pbdoc")
.def_readwrite("optimized_model_filepath", &SessionOptions::optimized_model_filepath,
R"pbdoc(File path to serialize optimized model. By default, optimized model is not serialized if optimized_model_filepath is not provided.)pbdoc")
.def_readwrite("enable_mem_pattern", &SessionOptions::enable_mem_pattern,
R"pbdoc(Enable the memory pattern optimization. Default is true.)pbdoc")
.def_readwrite("enable_sequential_execution", &SessionOptions::enable_sequential_execution,
Expand Down
83 changes: 83 additions & 0 deletions onnxruntime/test/framework/inference_session_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,18 @@ class FuseExecutionProvider : public IExecutionProvider {
}
};

// InferenceSession wrapper to expose loaded graph.
class InferenceSessionGetGraphWrapper : public InferenceSession {
public:
explicit InferenceSessionGetGraphWrapper(const SessionOptions& session_options,
logging::LoggingManager* logging_manager) : InferenceSession(session_options, logging_manager) {
}

const Graph& GetGraph() {
return model_->MainGraph();
}
};

namespace test {
static void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
const std::vector<float>& expected_values);
Expand Down Expand Up @@ -330,6 +342,77 @@ TEST(InferenceSessionTests, DisableCPUArena) {
RunModel(session_object, run_options);
}

TEST(InferenceSessionTests, TestModelSerialization) {
// Load model with level 0 transform level
// and assert that the model has Identity nodes.
SessionOptions so;
const string test_model = "testdata/transform/abs-id-max.onnx";
so.session_logid = "InferenceSessionTests.TestModelSerialization";
so.graph_optimization_level = TransformerLevel::Default;
InferenceSessionGetGraphWrapper session_object_noopt{so, &DefaultLoggingManager()};
ASSERT_TRUE(session_object_noopt.Load(test_model).IsOK());
ASSERT_TRUE(session_object_noopt.Initialize().IsOK());

// Assert that model has Identity Nodes.
const auto& graph_noopt = session_object_noopt.GetGraph();
std::map<std::string, int> op_to_count_noopt = CountOpsInGraph(graph_noopt);
ASSERT_TRUE(op_to_count_noopt["Identity"] > 0);

// Load model with level 1 transform level.
so.graph_optimization_level = TransformerLevel::Level1;
so.optimized_model_filepath = ToWideString(test_model + "-TransformLevel-" + std::to_string(static_cast<uint32_t>(so.graph_optimization_level)));
InferenceSessionGetGraphWrapper session_object{so, &DefaultLoggingManager()};
ASSERT_TRUE(session_object.Load(test_model).IsOK());
ASSERT_TRUE(session_object.Initialize().IsOK());

// Assert that model has been transformed and identity Node is removed.
const auto& graph = session_object.GetGraph();
std::map<std::string, int> op_to_count = CountOpsInGraph(graph);
ASSERT_TRUE(op_to_count["Identity"] == 0);

// Serialize model to the same file path again to make sure that rewrite doesn't fail.
InferenceSession overwrite_session_object{so, &DefaultLoggingManager()};
ASSERT_TRUE(overwrite_session_object.Load(test_model).IsOK());
ASSERT_TRUE(overwrite_session_object.Initialize().IsOK());

// Load serialized model with no transform level and serialize model.
SessionOptions so_opt;
so_opt.session_logid = "InferenceSessionTests.TestModelSerialization";
so_opt.graph_optimization_level = TransformerLevel::Default;
so_opt.optimized_model_filepath = ToWideString(so.optimized_model_filepath) + ToWideString("-TransformLevel-" + std::to_string(static_cast<uint32_t>(so_opt.graph_optimization_level)));
InferenceSession session_object_opt{so_opt, &DefaultLoggingManager()};
ASSERT_TRUE(session_object_opt.Load(so.optimized_model_filepath).IsOK());
ASSERT_TRUE(session_object_opt.Initialize().IsOK());

// Assert that re-feed of optimized model with default transform level results
// in same runtime model as abs-id-max.onnx with TransformLevel-1.
std::ifstream model_fs_session1(so.optimized_model_filepath, ios::in | ios::binary);
ASSERT_TRUE(model_fs_session1.good());
std::ifstream model_fs_session2(so_opt.optimized_model_filepath, ios::in | ios::binary);
ASSERT_TRUE(model_fs_session2.good());
ASSERT_TRUE(model_fs_session1.tellg() == model_fs_session2.tellg());
model_fs_session1.seekg(0, std::ifstream::beg);
model_fs_session2.seekg(0, std::ifstream::beg);
ASSERT_TRUE(std::equal(std::istreambuf_iterator<char>(model_fs_session1.rdbuf()),
std::istreambuf_iterator<char>(),
std::istreambuf_iterator<char>(model_fs_session2.rdbuf())));

// Assert that empty optimized model file-path doesn't fail loading.
so_opt.optimized_model_filepath = ToWideString("");
InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()};
ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK());
ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK());

// Assert that level 3 optimization doesn't result in serialized model.
so_opt.optimized_model_filepath = ToWideString("ShouldNotSerialize");
so_opt.graph_optimization_level = TransformerLevel::Level3;
InferenceSession session_object_Level3Test{so_opt, &DefaultLoggingManager()};
ASSERT_TRUE(session_object_Level3Test.Load(test_model).IsOK());
ASSERT_TRUE(session_object_Level3Test.Initialize().IsOK());
std::ifstream model_fs_Level3(so_opt.optimized_model_filepath, ios::in | ios::binary);
ASSERT_TRUE(model_fs_Level3.fail());
}

#ifdef ORT_RUN_EXTERNAL_ONNX_TESTS
static bool Compare(const InputDefList& f_arg, const InputDefList& s_arg) {
if (f_arg.size() != s_arg.size()) {
Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/test/python/onnxruntime_test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ def run_model(self, session_object, run_options):
np.testing.assert_allclose(
output_expected, res[0], rtol=1e-05, atol=1e-08)

def testModelSerialization(self):
so = onnxrt.SessionOptions()
so.session_log_verbosity_level = 1
so.session_logid = "TestModelSerialization"
so.optimized_model_filepath = "./PythonApiTestOptimizedModel.onnx"
onnxrt.InferenceSession(self.get_name("mul_1.onnx"), sess_options=so)
self.assertTrue(os.path.isfile(so.optimized_model_filepath))

def testRunModel(self):
sess = onnxrt.InferenceSession(self.get_name("mul_1.onnx"))
x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
Expand Down