diff --git a/.github/workflows/job_python_api_tests.yml b/.github/workflows/job_python_api_tests.yml index 541a14e2b1b6df..654d634f4f56f3 100644 --- a/.github/workflows/job_python_api_tests.yml +++ b/.github/workflows/job_python_api_tests.yml @@ -101,10 +101,10 @@ jobs: --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ --ignore=${INSTALL_TEST_DIR}/tests/pyopenvino/tests/test_utils/test_utils.py - - name: Python API Tests -- numpy>=2.0.0 + - name: Python API Tests -- numpy<2.0.0 run: | python3 -m pip uninstall -y numpy - python3 -m pip install "numpy~=2.0.0" + python3 -m pip install "numpy~=1.26.0" python3 -m pip install -r ${INSTALL_TEST_DIR}/tests/bindings/python/requirements_test.txt # for 'template' extension export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}/tests/:$LD_LIBRARY_PATH diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index c80dc388568004..6e0e21335e50c8 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -83,7 +83,7 @@ For setting up a relevant configuration, refer to the :doc:`Integrate with Customer Application <../../openvino-workflow/running-inference/integrate-openvino-with-your-application>` topic (step 3 "Configure input and output"). -.. dropdown:: Device support across OpenVINO 2024.5 distributions +.. dropdown:: Device support across OpenVINO 2024.6 distributions =============== ========== ====== =============== ======== ============ ========== ========== ========== Device Archives PyPI APT/YUM/ZYPPER Conda Homebrew vcpkg Conan npm diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 9e7673d7d0910d..a168d1c44a10c3 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -16,359 +16,407 @@ OpenVINO Release Notes -2024.5 - 20 November 2024 +2024.6 - 18 December 2024 ############################# :doc:`System Requirements <./release-notes-openvino/system-requirements>` | :doc:`Release policy <./release-notes-openvino/release-policy>` | :doc:`Installation Guides <./../get-started/install-openvino>` - - What's new +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* More GenAI coverage and framework integrations to minimize code changes. - - * New models supported: Llama 3.2 (1B & 3B), Gemma 2 (2B & 9B), and YOLO11. - * LLM support on NPU: Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, Qwen2-7B-Instruct and Phi-3 - Mini-Instruct. - * Noteworthy notebooks added: Sam2, Llama3.2, Llama3.2 - Vision, Wav2Lip, Whisper, and Llava. - * Preview: support for Flax, a high-performance Python neural network library based on JAX. - Its modular design allows for easy customization and accelerated inference on GPUs. - -* Broader Large Language Model (LLM) support and more model compression techniques. - - * Optimizations for built-in GPUs on Intel® Core™ Ultra Processors (Series 1) and Intel® Arc™ - Graphics include KV Cache compression for memory reduction along with improved usability, - and model load time optimizations to improve first token latency for LLMs. - * Dynamic quantization was enabled to improve first token latency for LLMs on built-in - Intel® GPUs without impacting accuracy on Intel® Core™ Ultra Processors (Series 1). Second - token latency will also improve for large batch inference. - * A new method to generate synthetic text data is implemented in the Neural Network - Compression Framework (NNCF). This will allow LLMs to be compressed more accurately using - data-aware methods without datasets. Coming soon: This feature will soon be accessible via - Optimum Intel on Hugging Face. - -* More portability and performance to run AI at the edge, in the cloud, or locally. - - * Support for - `Intel® Xeon® 6 Processors with P-cores `__ - (formerly codenamed Granite Rapids) and - `Intel® Core™ Ultra 200V series processors `__ - (formerly codenamed Arrow Lake-S). - * Preview: GenAI API enables multimodal AI deployment with support for multimodal pipelines - for improved contextual awareness, transcription pipelines for easy audio-to-text - conversions, and image generation pipelines for streamlined text-to-visual conversions. - * Speculative decoding feature added to the GenAI API for improved performance and efficient - text generation using a small draft model that is periodically corrected by the full-size - model. - * Preview: LoRA adapters are now supported in the GenAI API for developers to quickly and - efficiently customize image and text generation models for specialized tasks. - * The GenAI API now also supports LLMs on NPU allowing developers to specify NPU as the - target device, specifically for WhisperPipeline (for whisper-base, whisper-medium, and - whisper-small) and LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, - Qwen2-7B-Instruct and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for - best performance. - -Now deprecated ------------------------------------------------------------------------------------------------ +* OpenVINO 2024.6 LTS release includes updates for enhanced stability and improved LLM performance. +* Introduced support for Intel® Arc™ B-Series Graphics (formerly known as Battlemage) +* Memory optimizations implemented to improve the inference time memory and LLM performance on NPUs. +* Improved LLM performance with GenAI API optimizations and bug fixes. -* Python 3.8 is no longer supported: OpenVINO™ Runtime +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -Common ------------------------------------------------------------------------------------------------ - -* Numpy 2.x has been adopted for all currently supported components, including NNCF. -* A new constant constructor has been added, enabling constants to be created from data pointer - as shared memory. Additionally, it can take ownership of a shared, or other, object, avoiding - a two-step process to wrap memory into ``ov::Tensor``. -* Asynchronous file reading with mmap library has been implemented, reducing loading times for - model files, especially for LLMs. -* CPU implementation of SliceScatter operator is now available, used for models such as Gemma, - supporting increased LLM performance. - - CPU Device Plugin ----------------------------------------------------------------------------------------------- -* Gold support of the Intel® Xeon® 6 platform with P-cores (formerly code name Granite Rapids) - has been reached. -* Support of Intel® Core™ Ultra 200V series processors (formerly codenamed Arrow Lake-S) has - been implemented. -* LLM performance has been further improved with Rotary Position Embedding optimization; Query, - Key, and Value; and multi-layer perceptron fusion optimization. -* FP16 support has been extended with SDPA and PagedAttention, improving performance of LLM via - both native APIs and the vLLM integration. -* Models with LoRA adapters are now supported. - +* KV cache now uses asymmetric U8 as the default precision, reducing memory stress for LLMs and + increasing their performance. This option can be controlled by model meta data. +* Quality and accuracy has been improved for selected models with several bug fixes. GPU Device Plugin ----------------------------------------------------------------------------------------------- -* The KV cache INT8 compression mechanism is now available for all supported GPUs. It enables a - significant reduction in memory consumption, increasing performance with a minimal impact to - accuracy (it affects systolic devices slightly more than non-systolic ones). The feature is - activated by default for non-systolic devices. -* LoRA adapters are now functionally supported on GPU. -* A new feature of GPU weightless blob caching enables caching model structure only and reusing - the weights from the original model file. Use the new OPTIMIZE_SIZE property to activate. -* Dynamic quantization with INT4 and INT8 precisions has been implemented and enabled by - default on Intel® Core™ Ultra platforms, improving LLM first token latency. - +* Device memory copy optimizations have been introduced for inference with **Intel® Arc™ B-Series + Graphics** (formerly known as Battlemage). Since it does not utilize L2 cache for copying memory + between the device and host, a dedicated `copy` operation is used, if inputs or results are + not expected in the device memory. +* ChatGLM4 inference on GPU has been optimized. NPU Device Plugin ----------------------------------------------------------------------------------------------- -* Models retrieved from the OpenVINO cache have a smaller memory footprint now. The plugin - releases the cached model (blob) after weights are loaded in NPU regions. Model export is not - available in this scenario. Memory consumption is reduced during inference execution with one - blob size. This optimization requires the latest NPU driver: 32.0.100.3104. -* A driver bug for ``ov::intel_npu::device_total_mem_size`` has been fixed. The plugin will now - report 2GB as the maximum allocatable memory for any driver that does not support graph - extension 1.8. Even if older drivers report a larger amount of memory to be available, memory - allocation would fail when 2GB are exceeded. Plugin reports the number that driver exposes - for any driver that supports graph extension 1.8 (or newer). -* A new API is used to initialize the model (available in graph extension 1.8). -* Inference request set_tensors is now supported. -* ``ov::device::LUID`` is now exposed on Windows. -* LLM-related improvements have been implemented in terms of both memory usage and performance. -* AvgPool and MaxPool operator support has been extended, adding support for more PyTorch models. - -* NOTE: for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM may - be required to use larger models, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B - (exceeding 4B parameters) with prompt sizes over 1024 tokens. - - -OpenVINO Python API ------------------------------------------------------------------------------------------------ +* LLM performance and inference time has been improved with memory optimizations. -* Constant now can be created from openvino.Tensor. -* The “release_memory” method has been added for a compiled model, improving control over - memory consumption. -OpenVINO Node.js API ------------------------------------------------------------------------------------------------ -* Querying the best device to perform inference of a model with specific operations - is now available in JavaScript API. -* Contribution guidelines have been improved to make it easier for developers to contribute. -* Testing scope has been extended by inference in end-to-end tests. -* JavaScript API samples have been improved for readability and ease of running. +OpenVINO.GenAI ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +* The encrypted_model_causal_lm sample is now available, showing how to decrypt a model. -TensorFlow Framework Support ------------------------------------------------------------------------------------------------ -* TensorFlow 2.18.0, Keras 3.6.0, NumPy 2.0.2 in Python 3.12, and NumPy 1.26.4 in other Python - versions have been added to validation. -* Out-of-the-box conversion with static ranks has been improved by devising a new shape for - Switch-Merge condition sub-graphs. -* Complex type for the following operations is now supported: ExpandDims, Pack, Prod, Rsqrt, - ScatterNd, Sub. -* The following issues have been fixed: - * the corner case with one element in LinSpace to avoid division by zero, - * support FP16 and FP64 input types for LeakyRelu, - * support non-i32/i64 output index type for ArgMin/Max operations. +Other Changes and Known Issues ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +Jupyter Notebooks +----------------------------- +* `Visual-language assistant with GLM-Edge-V and OpenVINO `__ +* `Local AI and OpenVINO `__ +* `Multimodal understanding and generation with Janus and OpenVINO `__ -PyTorch Framework Support ------------------------------------------------------------------------------------------------ -* PyTorch version 2.5 is now supported. -* OpenVINO Model Converter (OVC) now supports TorchScript and ExportedProgram saved on a drive. -* The issue of aten.index.Tensor conversion for indices with “None” values has been fixed, - helping to support the HF Stable Diffusion model in ExportedProgram format. -ONNX Framework Support ------------------------------------------------------------------------------------------------ -* ONNX version 1.17.0 is now used. -* Customers' models with DequantizeLinear-21, com.microsoft.MatMulNBits, and - com.microsoft.QuickGelu operations are now supported. -JAX/Flax Framework Support ------------------------------------------------------------------------------------------------ -* JAX 0.4.35 and Flax 0.10.0 has been added to validation. -* jax._src.core.ClosedJaxpr object conversion is now supported. -* Vision Transformer from google-research/vision_transformer is now supported - (with support for 37 new operations). -OpenVINO Model Server -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* The OpenAI API text embedding endpoint has been added, enabling OVMS to be used as a building - block for AI applications like RAG. - `(read more) `__ -* The rerank endpoint has been added based on Cohere API, enabling easy similarity detection - between a query and a set of documents. It is one of the building blocks for AI applications - like RAG and makes integration with frameworks such as langchain easy. - `(read more) `__ -* The following improvements have been done to LLM text generation: - - * The ``echo`` sampling parameter together with ``logprobs`` in the ``completions`` endpoint - is now supported. - * Performance has been increased on both CPU and GPU. - * Throughput in high-concurrency scenarios has been increased with dynamic_split_fuse for GPU. - * Testing coverage and stability has been improved. - * The procedure for service deployment and model repository preparation has been simplified. - -* An experimental version of a Windows binary package - native model server for Windows OS - is - available. This release includes a set of limitations and has limited tests coverage. It is - intended for testing, while the production-ready release is expected with 2025.0. All feedback - is welcome. - - -Neural Network Compression Framework -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* A new nncf.data.generate_text_data() method has been added for generating a synthetic dataset - for LLM compression. This approach helps to compress LLMs more accurately in situations when - the dataset is not available or not sufficient. - `See our example `__ - for more information about the usage. -* Support of data-free and data-aware weight compression methods - nncf.compress_weights() - - has been extended with NF4 per-channel quantization, making compressed LLMs more accurate and - faster on NPU. -* Caching of computed statistics in nncf.compress_weights() is now available, significantly - reducing compression time when performing compression of the same LLM multiple times, with - different compression parameters. To enable it, set the advanced ``statistics_path`` parameter - of nncf.compress_weights() to the desired file path location. -* The ``backup_mode`` optional parameter has been added to nncf.compress_weights(), for - specifying the data type for embeddings, convolutions, and last linear layers during 4-bit - weight compression. Available options are INT8_ASYM (default), INT8_SYM, and NONE (retains - the original floating-point precision of the model weights). In certain situations, - non-default value might give better accuracy of compressed LLMs. -* Preview support is now available for optimizing models in Torch - `FX format `__, nncf.quantize(), and - nncf.compress_weights() methods. After optimization such models can be directly executed - via torch.compile(compressed_model, backend="openvino"). For more details, see - `INT8 quantization example `__. -* Memory consumption of data-aware weight compression methods - nncf.compress_weights() – has - been reduced significantly, with some variation depending on the model and method. -* Support for the following has changed: - - * NumPy 2 added - * PyTorch upgraded to 2.5.1 - * ONNX upgraded to 1.17 - * Python 3.8 discontinued - - - -OpenVINO Tokenizers +Previous 2024 releases +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* Several operations have been introduced and optimized. -* Conversion parameters and environment info have been added to ``rt_info``, improving - reproducibility and debugging. +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +.. dropdown:: 2024.5 - 20 November 2024 + :animate: fade-in-slide-down + :color: secondary -OpenVINO.GenAI -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + **What's new** -* The following has been added: + * More GenAI coverage and framework integrations to minimize code changes. - * LoRA adapter for the LLMPipeline. - * Text2ImagePipeline with LoRA adapter and text2image samples. - * VLMPipeline and visual_language_chat sample for text generation models with text and image - inputs. - * WhisperPipeline and whisper_speech_recognition sample. + * New models supported: Llama 3.2 (1B & 3B), Gemma 2 (2B & 9B), and YOLO11. + * LLM support on NPU: Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, Qwen2-7B-Instruct and Phi-3 + Mini-Instruct. + * Noteworthy notebooks added: Sam2, Llama3.2, Llama3.2 - Vision, Wav2Lip, Whisper, and Llava. + * Preview: support for Flax, a high-performance Python neural network library based on JAX. + Its modular design allows for easy customization and accelerated inference on GPUs. -* speculative_decoding_lm has been moved to LLMPipeline based implementation and is now - installed as part of the package. -* On NPU, a set of pipelines has been enabled: WhisperPipeline (for whisper-base, - whisper-medium, and whisper-small), LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, - Qwen2-7B-Instruct, and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for - best performance. + * Broader Large Language Model (LLM) support and more model compression techniques. + * Optimizations for built-in GPUs on Intel® Core™ Ultra Processors (Series 1) and Intel® Arc™ + Graphics include KV Cache compression for memory reduction along with improved usability, + and model load time optimizations to improve first token latency for LLMs. + * Dynamic quantization was enabled to improve first token latency for LLMs on built-in + Intel® GPUs without impacting accuracy on Intel® Core™ Ultra Processors (Series 1). Second + token latency will also improve for large batch inference. + * A new method to generate synthetic text data is implemented in the Neural Network + Compression Framework (NNCF). This will allow LLMs to be compressed more accurately using + data-aware methods without datasets. Coming soon: This feature will soon be accessible via + Optimum Intel on Hugging Face. + * More portability and performance to run AI at the edge, in the cloud, or locally. + * Support for + `Intel® Xeon® 6 Processors with P-cores `__ + (formerly codenamed Granite Rapids) and + `Intel® Core™ Ultra 200V series processors `__ + (formerly codenamed Arrow Lake-S). + * Preview: GenAI API enables multimodal AI deployment with support for multimodal pipelines + for improved contextual awareness, transcription pipelines for easy audio-to-text + conversions, and image generation pipelines for streamlined text-to-visual conversions. + * Speculative decoding feature added to the GenAI API for improved performance and efficient + text generation using a small draft model that is periodically corrected by the full-size + model. + * Preview: LoRA adapters are now supported in the GenAI API for developers to quickly and + efficiently customize image and text generation models for specialized tasks. + * The GenAI API now also supports LLMs on NPU allowing developers to specify NPU as the + target device, specifically for WhisperPipeline (for whisper-base, whisper-medium, and + whisper-small) and LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, + Qwen2-7B-Instruct and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for + best performance. + *Now deprecated* -Other Changes and Known Issues -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + * Python 3.8 is no longer supported: -Jupyter Notebooks ------------------------------ -* `Text-to-Image generation using OpenVINO GenAI `__ -* `Multi LoRA Image Generation `__ -* `Virtual Try-on using OpenVINO and CatVTON `__ -* `Visual Language Assistant using OpenVINO GenAI `__ -* `Speech recognition using OpenVINO GenAI `__ -* `YoloV11 `__ -* `Llama-3.2-vision `__ -* `Pixtral `__ -* `Segment Anything 2 `__ -* `Video Lips-sync using Wav2Lip `__ -* `Convert JAX to OpenVINO tutorial `__ + **OpenVINO™ Runtime** + *Common* -Known Issues ------------------------------ + * Numpy 2.x has been adopted for all currently supported components, including NNCF. + * A new constant constructor has been added, enabling constants to be created from data pointer + as shared memory. Additionally, it can take ownership of a shared, or other, object, avoiding + a two-step process to wrap memory into ``ov::Tensor``. + * Asynchronous file reading with mmap library has been implemented, reducing loading times for + model files, especially for LLMs. + * CPU implementation of SliceScatter operator is now available, used for models such as Gemma, + supporting increased LLM performance. -| **Component: CPU Plugin** -| ID: 155898 -| Description: -| Description: When using new version of Transformer version to convert some of LLMs - (GPT-J/GPT-NeoX or falcon-7b), the inference accuracy may be impacted on 4th or 5th - generation of Intel® Xeon® processors, due to model structure update triggering inference - precision difference in part of the model. The workaround is to use transformer version of - 4.44.2 or lower. -| **Component: GPU Plugin** -| ID: 154583 -| Description: -| LLM accuracy can be low especially on non-systolic platforms like Intel® Core™ Ultra. When - facing the low accuracy issue, user needs to manually set a config ACTIVATION_SCALING_FACOTR - with a value of 8.0 in the compile_model() function. From the next release, scaling factor - value will be automatically applied through updated IR. + *CPU Device Plugin* -| **Component: GenAI** -| ID: 156437, 148933 -| Description: -| When using Python GenAI APIs, if ONNX 17.0 and later is installed, it may encounter the - error “DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) - initialization routine failed.” It is due to the ONNX dependency issue - `onnx/onnx#6267 `__, - Install - `Microsoft Visual C++ Redistributable `__ - latest supported downloads to fix the issue. + * Gold support of the Intel® Xeon® 6 platform with P-cores (formerly code name Granite Rapids) + has been reached. + * Support of Intel® Core™ Ultra 200V series processors (formerly codenamed Arrow Lake-S) has + been implemented. + * LLM performance has been further improved with Rotary Position Embedding optimization; Query, + Key, and Value; and multi-layer perceptron fusion optimization. + * FP16 support has been extended with SDPA and PagedAttention, improving performance of LLM via + both native APIs and the vLLM integration. + * Models with LoRA adapters are now supported. -| **Component: GenAI** -| ID: 156944 -| Description: -| There were backward incompatible changes resulting in different text generated by LLMs like - Mistralai/Mistral-7B-Instruct-v0.2 and TinyLlama/TinyLlama-1.1B-Chat-v1.0 when using a - tokenizer converted by older openvino_tolenizers. A way to resolve the issue is to convert - tokenizer and detokenizer models using the latest openvino_tokenizers. + *GPU Device Plugin* + * The KV cache INT8 compression mechanism is now available for all supported GPUs. It enables a + significant reduction in memory consumption, increasing performance with a minimal impact to + accuracy (it affects systolic devices slightly more than non-systolic ones). The feature is + activated by default for non-systolic devices. + * LoRA adapters are now functionally supported on GPU. + * A new feature of GPU weightless blob caching enables caching model structure only and reusing + the weights from the original model file. Use the new OPTIMIZE_SIZE property to activate. + * Dynamic quantization with INT4 and INT8 precisions has been implemented and enabled by + default on Intel® Core™ Ultra platforms, improving LLM first token latency. + *NPU Device Plugin* + + * Models retrieved from the OpenVINO cache have a smaller memory footprint now. The plugin + releases the cached model (blob) after weights are loaded in NPU regions. Model export is not + available in this scenario. Memory consumption is reduced during inference execution with one + blob size. This optimization requires the latest NPU driver: 32.0.100.3104. + * A driver bug for ``ov::intel_npu::device_total_mem_size`` has been fixed. The plugin will now + report 2GB as the maximum allocatable memory for any driver that does not support graph + extension 1.8. Even if older drivers report a larger amount of memory to be available, memory + allocation would fail when 2GB are exceeded. Plugin reports the number that driver exposes + for any driver that supports graph extension 1.8 (or newer). + * A new API is used to initialize the model (available in graph extension 1.8). + * Inference request set_tensors is now supported. + * ``ov::device::LUID`` is now exposed on Windows. + * LLM-related improvements have been implemented in terms of both memory usage and performance. + * AvgPool and MaxPool operator support has been extended, adding support for more PyTorch models. + + * NOTE: for systems based on Intel® Core™ Ultra Processors Series 2, more than 16GB of RAM may + be required to use larger models, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B + (exceeding 4B parameters) with prompt sizes over 1024 tokens. + + + *OpenVINO Python API* + + * Constant now can be created from openvino.Tensor. + * The “release_memory” method has been added for a compiled model, improving control over + memory consumption. + + + + *OpenVINO Node.js API* + + * Querying the best device to perform inference of a model with specific operations + is now available in JavaScript API. + * Contribution guidelines have been improved to make it easier for developers to contribute. + * Testing scope has been extended by inference in end-to-end tests. + * JavaScript API samples have been improved for readability and ease of running. + + + + *TensorFlow Framework Support* + + * TensorFlow 2.18.0, Keras 3.6.0, NumPy 2.0.2 in Python 3.12, and NumPy 1.26.4 in other Python + versions have been added to validation. + * Out-of-the-box conversion with static ranks has been improved by devising a new shape for + Switch-Merge condition sub-graphs. + * Complex type for the following operations is now supported: ExpandDims, Pack, Prod, Rsqrt, + ScatterNd, Sub. + * The following issues have been fixed: + + * the corner case with one element in LinSpace to avoid division by zero, + * support FP16 and FP64 input types for LeakyRelu, + * support non-i32/i64 output index type for ArgMin/Max operations. + + + + *PyTorch Framework Support* + + * PyTorch version 2.5 is now supported. + * OpenVINO Model Converter (OVC) now supports TorchScript and ExportedProgram saved on a drive. + * The issue of aten.index.Tensor conversion for indices with “None” values has been fixed, + helping to support the HF Stable Diffusion model in ExportedProgram format. + + + + *ONNX Framework Support* + + * ONNX version 1.17.0 is now used. + * Customers' models with DequantizeLinear-21, com.microsoft.MatMulNBits, and + com.microsoft.QuickGelu operations are now supported. + + *JAX/Flax Framework Support* + + * JAX 0.4.35 and Flax 0.10.0 has been added to validation. + * jax._src.core.ClosedJaxpr object conversion is now supported. + * Vision Transformer from google-research/vision_transformer is now supported + (with support for 37 new operations). + + + **OpenVINO Model Server** + + * The OpenAI API text embedding endpoint has been added, enabling OVMS to be used as a building + block for AI applications like RAG. + `(read more) `__ + * The rerank endpoint has been added based on Cohere API, enabling easy similarity detection + between a query and a set of documents. It is one of the building blocks for AI applications + like RAG and makes integration with frameworks such as langchain easy. + `(read more) `__ + * The following improvements have been done to LLM text generation: + + * The ``echo`` sampling parameter together with ``logprobs`` in the ``completions`` endpoint + is now supported. + * Performance has been increased on both CPU and GPU. + * Throughput in high-concurrency scenarios has been increased with dynamic_split_fuse for GPU. + * Testing coverage and stability has been improved. + * The procedure for service deployment and model repository preparation has been simplified. + + * An experimental version of a Windows binary package - native model server for Windows OS - is + available. This release includes a set of limitations and has limited tests coverage. It is + intended for testing, while the production-ready release is expected with 2025.0. All feedback + is welcome. + + + **Neural Network Compression Framework** + + * A new nncf.data.generate_text_data() method has been added for generating a synthetic dataset + for LLM compression. This approach helps to compress LLMs more accurately in situations when + the dataset is not available or not sufficient. + `See our example `__ + for more information about the usage. + * Support of data-free and data-aware weight compression methods - nncf.compress_weights() - + has been extended with NF4 per-channel quantization, making compressed LLMs more accurate and + faster on NPU. + * Caching of computed statistics in nncf.compress_weights() is now available, significantly + reducing compression time when performing compression of the same LLM multiple times, with + different compression parameters. To enable it, set the advanced ``statistics_path`` parameter + of nncf.compress_weights() to the desired file path location. + * The ``backup_mode`` optional parameter has been added to nncf.compress_weights(), for + specifying the data type for embeddings, convolutions, and last linear layers during 4-bit + weight compression. Available options are INT8_ASYM (default), INT8_SYM, and NONE (retains + the original floating-point precision of the model weights). In certain situations, + non-default value might give better accuracy of compressed LLMs. + * Preview support is now available for optimizing models in Torch + `FX format `__, nncf.quantize(), and + nncf.compress_weights() methods. After optimization such models can be directly executed + via torch.compile(compressed_model, backend="openvino"). For more details, see + `INT8 quantization example `__. + * Memory consumption of data-aware weight compression methods - nncf.compress_weights() – has + been reduced significantly, with some variation depending on the model and method. + * Support for the following has changed: + + * NumPy 2 added + * PyTorch upgraded to 2.5.1 + * ONNX upgraded to 1.17 + * Python 3.8 discontinued + + + + **OpenVINO Tokenizers** + + * Several operations have been introduced and optimized. + * Conversion parameters and environment info have been added to ``rt_info``, improving + reproducibility and debugging. + + + + **OpenVINO.GenAI** + + * The following has been added: + + * LoRA adapter for the LLMPipeline. + * Text2ImagePipeline with LoRA adapter and text2image samples. + * VLMPipeline and visual_language_chat sample for text generation models with text and image + inputs. + * WhisperPipeline and whisper_speech_recognition sample. + + * speculative_decoding_lm has been moved to LLMPipeline based implementation and is now + installed as part of the package. + * On NPU, a set of pipelines has been enabled: WhisperPipeline (for whisper-base, + whisper-medium, and whisper-small), LLMPipeline (for Llama 3 8B, Llama 2 7B, Mistral-v0.2-7B, + Qwen2-7B-Instruct, and Phi-3 Mini-instruct). Use driver version 32.0.100.3104 or later for + best performance. + + + + + + **Other Changes and Known Issues** + + *Jupyter Notebooks* + + * `Text-to-Image generation using OpenVINO GenAI `__ + * `Multi LoRA Image Generation `__ + * `Virtual Try-on using OpenVINO and CatVTON `__ + * `Visual Language Assistant using OpenVINO GenAI `__ + * `Speech recognition using OpenVINO GenAI `__ + * `YoloV11 `__ + * `Llama-3.2-vision `__ + * `Pixtral `__ + * `Segment Anything 2 `__ + * `Video Lips-sync using Wav2Lip `__ + * `Convert JAX to OpenVINO tutorial `__ + + + *Known Issues* + + | **Component: CPU Plugin** + | ID: 155898 + | Description: + | Description: When using new version of Transformer version to convert some of LLMs + (GPT-J/GPT-NeoX or falcon-7b), the inference accuracy may be impacted on 4th or 5th + generation of Intel® Xeon® processors, due to model structure update triggering inference + precision difference in part of the model. The workaround is to use transformer version of + 4.44.2 or lower. + + | **Component: GPU Plugin** + | ID: 154583 + | Description: + | LLM accuracy can be low especially on non-systolic platforms like Intel® Core™ Ultra. When + facing the low accuracy issue, user needs to manually set a config ACTIVATION_SCALING_FACOTR + with a value of 8.0 in the compile_model() function. From the next release, scaling factor + value will be automatically applied through updated IR. + + | **Component: GenAI** + | ID: 156437, 148933 + | Description: + | When using Python GenAI APIs, if ONNX 17.0 and later is installed, it may encounter the + error “DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) + initialization routine failed.” It is due to the ONNX dependency issue + `onnx/onnx#6267 `__, + Install + `Microsoft Visual C++ Redistributable `__ + latest supported downloads to fix the issue. + + | **Component: GenAI** + | ID: 156944 + | Description: + | There were backward incompatible changes resulting in different text generated by LLMs like + Mistralai/Mistral-7B-Instruct-v0.2 and TinyLlama/TinyLlama-1.1B-Chat-v1.0 when using a + tokenizer converted by older openvino_tolenizers. A way to resolve the issue is to convert + tokenizer and detokenizer models using the latest openvino_tokenizers. -Previous 2024 releases -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -.. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -472,20 +520,20 @@ Previous 2024 releases *NPU Device Plugin* -* `Remote Tensor API `__ - is now supported. -* You can now query the available number of tiles (ov::intel_npu::max_tiles) and force a - specific number of tiles to be used by the model, per inference request - (ov::intel_npu::tiles). **Note:** ov::intel_npu::tiles overrides the default number of tiles - selected by the compiler based on performance hints (ov::hint::performance_mode). Any tile - number other than 1 may be a problem for cross platform compatibility, if not tested - explicitly versus the max_tiles value. -* You can now bypass the model caching mechanism in the driver - (ov::intel_npu::bypass_umd_caching). Read more about driver and OpenVINO caching. -* Memory footprint at model execution has been reduced by one blob (compiled model) size. - For execution, the plugin no longer retrieves the compiled model from the driver, it uses the - level zero graph handle directly, instead. The compiled model is now retrieved from the driver - only during the export method. + * `Remote Tensor API `__ + is now supported. + * You can now query the available number of tiles (ov::intel_npu::max_tiles) and force a + specific number of tiles to be used by the model, per inference request + (ov::intel_npu::tiles). **Note:** ov::intel_npu::tiles overrides the default number of tiles + selected by the compiler based on performance hints (ov::hint::performance_mode). Any tile + number other than 1 may be a problem for cross platform compatibility, if not tested + explicitly versus the max_tiles value. + * You can now bypass the model caching mechanism in the driver + (ov::intel_npu::bypass_umd_caching). Read more about driver and OpenVINO caching. + * Memory footprint at model execution has been reduced by one blob (compiled model) size. + For execution, the plugin no longer retrieves the compiled model from the driver, it uses the + level zero graph handle directly, instead. The compiled model is now retrieved from the driver + only during the export method. *OpenVINO Python API* @@ -1811,6 +1859,4 @@ Copyright © 2024, Intel Corporation. All rights reserved. For more complete information about compiler optimizations, see our Optimization Notice. -Performance varies by use, configuration and other factors. - - +Performance varies by use, configuration and other factors. \ No newline at end of file diff --git a/docs/articles_en/documentation/openvino-extensibility.rst b/docs/articles_en/documentation/openvino-extensibility.rst index 216135009b1806..d166f1390d643d 100644 --- a/docs/articles_en/documentation/openvino-extensibility.rst +++ b/docs/articles_en/documentation/openvino-extensibility.rst @@ -45,7 +45,7 @@ The first part is required for inference. The second part is required for succes Definition of Operation Semantics ################################# -If the custom operation can be mathematically represented as a combination of exiting OpenVINO operations and such decomposition gives desired performance, then low-level operation implementation is not required. Refer to the latest OpenVINO operation set, when deciding feasibility of such decomposition. You can use any valid combination of exiting operations. The next section of this document describes the way to map a custom operation. +If the custom operation can be mathematically represented as a combination of existing OpenVINO operations and such decomposition gives desired performance, then low-level operation implementation is not required. Refer to the latest OpenVINO operation set, when deciding feasibility of such decomposition. You can use any valid combination of existing operations. The next section of this document describes the way to map a custom operation. If such decomposition is not possible or appears too bulky with a large number of constituent operations that do not perform well, then a new class for the custom operation should be implemented, as described in the :doc:`Custom Operation Guide `. diff --git a/docs/articles_en/get-started/configurations/genai-dependencies.rst b/docs/articles_en/get-started/configurations/genai-dependencies.rst index 59d29ef3108da0..4486890c3a40b8 100644 --- a/docs/articles_en/get-started/configurations/genai-dependencies.rst +++ b/docs/articles_en/get-started/configurations/genai-dependencies.rst @@ -4,12 +4,12 @@ OpenVINO™ GenAI Dependencies OpenVINO™ GenAI depends on both `OpenVINO `__ and `OpenVINO Tokenizers `__. During OpenVINO™ GenAI installation from PyPi, the same versions of OpenVINO and OpenVINO Tokenizers -are used (e.g. ``openvino==2024.5.0`` and ``openvino-tokenizers==2024.5.0.0`` are installed for -``openvino-genai==2024.5.0``). +are used (e.g. ``openvino==2024.6.0`` and ``openvino-tokenizers==2024.6.0.0`` are installed for +``openvino-genai==2024.6.0``). -Trying to update any of the dependency packages might result in a version incompatiblibty +Trying to update any of the dependency packages might result in a version incompatibility due to different Application Binary Interfaces (ABIs), which will result in errors while running -OpenVINO GenAI. Having package version in the ``...`` format, allows +OpenVINO GenAI. Having package version in the ``...`` format, enables changing the ```` portion of the full version to ensure ABI compatibility. Changing ````, ```` or ```` part of the version may break ABI. diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index 48ea0a434c5388..68656e554145a4 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -1,4 +1,4 @@ -Install OpenVINO™ 2024.5 +Install OpenVINO™ 2024.6 ========================== @@ -23,10 +23,11 @@ Install OpenVINO™ 2024.5 -OpenVINO 2024.5, described here, is not a Long-Term-Support version! +OpenVINO 2024.6, described here, is a Long-Term-Support version! All currently supported versions are: -* 2024.5 (development) +* 2025.0 (in development) +* 2024.6 (LTS) * 2023.3 (LTS) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide.rst index 5846d1a484737c..e1d643648b4be5 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide.rst @@ -20,12 +20,12 @@ Generative AI workflow Generative AI is a specific area of Deep Learning models used for producing new and “original” data, based on input in the form of image, sound, or natural language text. Due to their complexity and size, generative AI pipelines are more difficult to deploy and run efficiently. -OpenVINO simplifies the process and ensures high-performance integrations, with the following +OpenVINO™ simplifies the process and ensures high-performance integrations, with the following options: .. tab-set:: - .. tab-item:: OpenVINO GenAI + .. tab-item:: OpenVINO™ GenAI | - Suggested for production deployment for the supported use cases. | - Smaller footprint and fewer dependencies. @@ -39,6 +39,8 @@ options: text generation loop, tokenization, and scheduling, offering ease of use and high performance. + `Check out the OpenVINO GenAI Quick-start Guide [PDF] `__ + .. tab-item:: Hugging Face integration | - Suggested for prototyping and, if the use case is not covered by OpenVINO GenAI, production. @@ -54,49 +56,34 @@ options: as well as conversion on the fly. For integration with the final product it may offer lower performance, though. -`Check out the GenAI Quick-start Guide [PDF] `__ - -The advantages of using OpenVINO for LLM deployment: - -.. dropdown:: Fewer dependencies and smaller footprint - :animate: fade-in-slide-down - :color: secondary - - Less bloated than frameworks such as Hugging Face and PyTorch, with a smaller binary size and reduced - memory footprint, makes deployments easier and updates more manageable. - -.. dropdown:: Compression and precision management - :animate: fade-in-slide-down - :color: secondary - Techniques such as 8-bit and 4-bit weight compression, including embedding layers, and storage - format reduction. This includes fp16 precision for non-compressed models and int8/int4 for - compressed models, like GPTQ models from `Hugging Face `__. -.. dropdown:: Enhanced inference capabilities - :animate: fade-in-slide-down - :color: secondary +The advantages of using OpenVINO for generative model deployment: - Advanced features like in-place KV-cache, dynamic quantization, KV-cache quantization and - encapsulation, dynamic beam size configuration, and speculative sampling, and more are - available. +| **Fewer dependencies and smaller footprint** +| Less bloated than frameworks such as Hugging Face and PyTorch, with a smaller binary size and reduced + memory footprint, makes deployments easier and updates more manageable. -.. dropdown:: Stateful model optimization - :animate: fade-in-slide-down - :color: secondary +| **Compression and precision management** +| Techniques such as 8-bit and 4-bit weight compression, including embedding layers, and storage + format reduction. This includes fp16 precision for non-compressed models and int8/int4 for + compressed models, like GPTQ models from `Hugging Face `__. - Models from the Hugging Face Transformers are converted into a stateful form, optimizing - inference performance and memory usage in long-running text generation tasks by managing past - KV-cache tensors more efficiently internally. This feature is automatically activated for - many supported models, while unsupported ones remain stateless. Learn more about the - :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. +| **Enhanced inference capabilities** +| Advanced features like in-place KV-cache, dynamic quantization, KV-cache quantization and + encapsulation, dynamic beam size configuration, and speculative sampling, and more are + available. -.. dropdown:: Optimized LLM inference - :animate: fade-in-slide-down - :color: secondary +| **Stateful model optimization** +| Models from the Hugging Face Transformers are converted into a stateful form, optimizing + inference performance and memory usage in long-running text generation tasks by managing past + KV-cache tensors more efficiently internally. This feature is automatically activated for + many supported models, while unsupported ones remain stateless. Learn more about the + :doc:`Stateful models and State API <../openvino-workflow/running-inference/stateful-models>`. - Includes a Python API for rapid development and C++ for further optimization, offering - better performance than Python-based runtimes. +| **Optimized LLM inference** +| Includes a Python API for rapid development and C++ for further optimization, offering + better performance than Python-based runtimes. Proceed to guides on: diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 172586831252a9..eff30eed054295 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -28,6 +28,10 @@ make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvi .. dropdown:: Text-to-Image Generation + OpenVINO GenAI introduces the openvino_genai.Text2ImagePipeline for inference of text-to-image + models such as: as Stable Diffusion 1.5, 2.1, XL, LCM, Flex, and more. + See the following usage example for reference. + .. tab-set:: .. tab-item:: Python @@ -579,8 +583,9 @@ compression is done by NNCF at the model export stage. The exported model contai information necessary for execution, including the tokenizer/detokenizer and the generation config, ensuring that its results match those generated by Hugging Face. -The `LLMPipeline` is the main object used for decoding and handles all the necessary steps. -You can construct it directly from the folder with the converted model. +The `LLMPipeline` is the main object to setup the model for text generation. You can provide the +converted model to this object, specify the device for inference, and provide additional +parameters. .. tab-set:: @@ -911,7 +916,7 @@ running the following code: GenAI API ####################################### -The use case described here uses the following OpenVINO GenAI API methods: +The use case described here uses the following OpenVINO GenAI API classes: * generation_config - defines a configuration class for text generation, enabling customization of the generation process such as the maximum length of @@ -921,7 +926,6 @@ The use case described here uses the following OpenVINO GenAI API methods: text generation, and managing outputs with configurable options. * streamer_base - an abstract base class for creating streamers. * tokenizer - the tokenizer class for text encoding and decoding. -* visibility - controls the visibility of the GenAI library. Learn more from the `GenAI API reference `__. diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst index 53b8d5440ca855..e6d15675ea45b8 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-model-preparation.rst @@ -7,8 +7,8 @@ Generative Model Preparation -Since generative AI models tend to be big and resource-heavy, it is advisable to store them -locally and optimize for efficient inference. This article will show how to prepare +Since generative AI models tend to be big and resource-heavy, it is advisable to +optimize them for efficient inference. This article will show how to prepare LLM models for inference with OpenVINO by: * `Downloading Models from Hugging Face <#download-generative-models-from-hugging-face-hub>`__ diff --git a/docs/dev/ov_dependencies.txt b/docs/dev/ov_dependencies.txt index d9c344d2c3048d..cb64e4d5a6534c 100644 --- a/docs/dev/ov_dependencies.txt +++ b/docs/dev/ov_dependencies.txt @@ -1,6 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -#This file provides a comprehensive list of all dependencies of OpenVINO 2024.5 +#This file provides a comprehensive list of all dependencies of OpenVINO 2024.6 #The file is part of the automation pipeline for posting OpenVINO IR models on the HuggingFace Hub, including OneBOM dependency checks. diff --git a/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf b/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf index 90ad7bd6b000b4..13edfc8f0b7bc2 100644 Binary files a/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf and b/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf differ diff --git a/docs/sphinx_setup/index.rst b/docs/sphinx_setup/index.rst index ad98be58cde1cd..1e5233ac064d0f 100644 --- a/docs/sphinx_setup/index.rst +++ b/docs/sphinx_setup/index.rst @@ -25,16 +25,16 @@ hardware and environments, on-premises and on-device, in the browser or in the c
    +
  • +

    New GenAI API

    +

    Generative AI in only a few lines of code!

    + Check out our guide +
  • OpenVINO models on Hugging Face!

    Get pre-optimized OpenVINO models, no need to convert!

    Visit Hugging Face
  • -
  • -

    New Generative AI API

    -

    Generate text with LLMs in only a few lines of code!

    - Check out our guide -
  • Improved model serving

    OpenVINO Model Server has improved parallel inferencing!

    diff --git a/src/bindings/js/node/package.json b/src/bindings/js/node/package.json index 10fc6d38bd51f4..c0e4e03ddc4df6 100644 --- a/src/bindings/js/node/package.json +++ b/src/bindings/js/node/package.json @@ -51,6 +51,17 @@ "host": "https://storage.openvinotoolkit.org" }, "keywords": [ - "OpenVINO" + "OpenVINO", + "openvino", + "openvino-node", + "openvino npm", + "openvino binding", + "openvino node.js", + "openvino library", + "intel openvino", + "openvino toolkit", + "openvino API", + "openvino SDK", + "openvino integration" ] } diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index cc1d4514b7bbfe..4d65603a5323ab 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -1,5 +1,5 @@ # used in multiple components -numpy>=1.16.6,<2.2.0 # Python bindings, frontends +numpy>=1.16.6,<2.3.0 # Python bindings, frontends # pytest pytest>=5.0,<8.4 diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index a2d63161fe764c..febb91d5ecee55 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1,3 +1,3 @@ -numpy>=1.16.6,<2.2.0 +numpy>=1.16.6,<2.3.0 openvino-telemetry>=2023.2.1 packaging diff --git a/src/bindings/python/src/openvino/__init__.py b/src/bindings/python/src/openvino/__init__.py index e4d1a247520332..69c678909b1c9e 100644 --- a/src/bindings/python/src/openvino/__init__.py +++ b/src/bindings/python/src/openvino/__init__.py @@ -27,11 +27,11 @@ from openvino import properties as properties # Import most important classes and functions from openvino.runtime -from openvino.runtime import Model -from openvino.runtime import Core -from openvino.runtime import CompiledModel -from openvino.runtime import InferRequest -from openvino.runtime import AsyncInferQueue +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue from openvino.runtime import Symbol from openvino.runtime import Dimension @@ -43,12 +43,13 @@ from openvino.runtime import Tensor from openvino.runtime import OVAny -from openvino.runtime import compile_model +# Helper functions for openvino module +from openvino.runtime.utils.data_helpers import tensor_from_file +from openvino._ov_api import compile_model from openvino.runtime import get_batch from openvino.runtime import set_batch from openvino.runtime import serialize from openvino.runtime import shutdown -from openvino.runtime import tensor_from_file from openvino.runtime import save_model from openvino.runtime import layout_helpers diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/_ov_api.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/ie_api.py rename to src/bindings/python/src/openvino/_ov_api.py diff --git a/src/bindings/python/src/openvino/runtime/exceptions.py b/src/bindings/python/src/openvino/exceptions.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/exceptions.py rename to src/bindings/python/src/openvino/exceptions.py diff --git a/src/bindings/python/src/openvino/opset8/ops.py b/src/bindings/python/src/openvino/opset8/ops.py index 05b97390baa780..6995d55a28a776 100644 --- a/src/bindings/python/src/openvino/opset8/ops.py +++ b/src/bindings/python/src/openvino/opset8/ops.py @@ -7,7 +7,7 @@ from typing import List, Optional, Tuple import numpy as np -from openvino.runtime.exceptions import UserInputError +from openvino.exceptions import UserInputError from openvino.op import Constant, Parameter, if_op from openvino.runtime import Node from openvino.runtime.opset_utils import _get_node_factory diff --git a/src/bindings/python/src/openvino/runtime/exceptions/__init__.py b/src/bindings/python/src/openvino/runtime/exceptions/__init__.py new file mode 100644 index 00000000000000..18524a21f7d468 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/exceptions/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.exceptions import OVError +from openvino.exceptions import UserInputError +from openvino.exceptions import OVTypeError diff --git a/src/bindings/python/src/openvino/runtime/ie_api/__init__.py b/src/bindings/python/src/openvino/runtime/ie_api/__init__.py new file mode 100644 index 00000000000000..a861224b67eded --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/ie_api/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import Model +from openvino._ov_api import AsyncInferQueue + +from openvino._ov_api import tensor_from_file +from openvino._ov_api import compile_model diff --git a/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp b/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp index dee95c6a832d2c..a19f2b2f482337 100644 --- a/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp +++ b/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp @@ -191,7 +191,7 @@ static void regclass_graph_PreProcessSteps(py::module m) { :param pads_end: Number of elements matches the number of indices in data attribute. Specifies the number of padding elements at the ending of each axis. :type pads_end: 1D tensor of type T_INT. :param value: All new elements are populated with this value or with 0 if input not provided. Shouldn’t be set for other pad_mode values. - :type value: scalar tensor of type T. + :type value: scalar tensor of type T. :param mode: pad_mode specifies the method used to generate new element values. :type mode: string :return: Reference to itself, allows chaining of calls in client's code in a builder-like manner. @@ -219,7 +219,7 @@ static void regclass_graph_PreProcessSteps(py::module m) { :param pads_end: Number of elements matches the number of indices in data attribute. Specifies the number of padding elements at the ending of each axis. :type pads_end: 1D tensor of type T_INT. :param value: All new elements are populated with this value or with 0 if input not provided. Shouldn’t be set for other pad_mode values. - :type value: scalar tensor of type T. + :type value: scalar tensor of type T. :param mode: pad_mode specifies the method used to generate new element values. :type mode: string :return: Reference to itself, allows chaining of calls in client's code in a builder-like manner. @@ -308,7 +308,8 @@ static void regclass_graph_InputTensorInfo(py::module m) { }, py::arg("layout"), R"( - Set layout for input tensor info + Set layout for input tensor info + :param layout: layout to be set :type layout: Union[str, openvino.runtime.Layout] )"); @@ -422,7 +423,8 @@ static void regclass_graph_OutputTensorInfo(py::module m) { }, py::arg("layout"), R"( - Set layout for output tensor info + Set layout for output tensor info + :param layout: layout to be set :type layout: Union[str, openvino.runtime.Layout] )"); @@ -475,7 +477,8 @@ static void regclass_graph_OutputModelInfo(py::module m) { }, py::arg("layout"), R"( - Set layout for output model info + Set layout for output model info + :param layout: layout to be set :type layout: Union[str, openvino.runtime.Layout] )"); diff --git a/src/bindings/python/tests/test_runtime/test_input_node.py b/src/bindings/python/tests/test_runtime/test_input_node.py index 5e083051934afb..c12eb085317afc 100644 --- a/src/bindings/python/tests/test_runtime/test_input_node.py +++ b/src/bindings/python/tests/test_runtime/test_input_node.py @@ -75,7 +75,8 @@ def test_input_get_source_output(device): net_input = compiled_model.output(0) input_node = net_input.get_node().inputs()[0] name = input_node.get_source_output().get_node().get_friendly_name() - assert name == "relu" + # Expected ReLu node name can be changed if conversion precision applied (new Convert node added) + assert name in ("relu", "relu.0") def test_input_get_tensor(device): diff --git a/src/bindings/python/tests/test_runtime/test_ovdict.py b/src/bindings/python/tests/test_runtime/test_ovdict.py index e7a5854d66d072..cf332bb0997dfb 100644 --- a/src/bindings/python/tests/test_runtime/test_ovdict.py +++ b/src/bindings/python/tests/test_runtime/test_ovdict.py @@ -9,7 +9,7 @@ import openvino.runtime.opset13 as ops from openvino import Core, CompiledModel, InferRequest, Model from openvino.runtime import ConstOutput -from openvino.runtime.ie_api import OVDict +from openvino.runtime.utils.data_helpers import OVDict def _get_ovdict( diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index aa067da4f360fd..d5e96ddafc252f 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -208,7 +208,8 @@ bool convert_function_precision(const std::shared_ptr& f, bool is_changed, bool is_subgraph, bool convert_input_output_precision, - bool store_original_precision_as_rt_attribute) { + bool store_original_precision_as_rt_attribute, + bool names_compatibility_mode) { bool is_output_precision_changed = false; ov::element::TypeVector orig_result_types; @@ -277,7 +278,8 @@ bool convert_function_precision(const std::shared_ptr& f, is_changed || is_output_precision_changed, true, true, - store_original_precision_as_rt_attribute) || + store_original_precision_as_rt_attribute, + names_compatibility_mode) || is_changed; } } @@ -325,18 +327,21 @@ bool convert_function_precision(const std::shared_ptr& f, if (result->get_input_element_type(0) != orig_result_types[i]) { auto result_input = result->input_value(0); const auto convert = std::make_shared(result_input, orig_result_types[i]); - if (result_input.get_node()->get_output_size() > 1) { - convert->set_friendly_name(result_input.get_node()->get_friendly_name() + "." + - std::to_string(result_input.get_index())); + + auto convert_f_name = result_input.get_node()->get_friendly_name(); + if (names_compatibility_mode) { + if (result_input.get_node()->get_output_size() > 1) { + convert_f_name += '.' + std::to_string(result_input.get_index()); + } else { + result_input.get_node()->set_friendly_name(""); + } + + convert->get_output_tensor(0).set_names(result_input.get_names()); } else { - convert->set_friendly_name(result_input.get_node()->get_friendly_name()); - result_input.get_node()->set_friendly_name(""); + convert_f_name += '.' + std::to_string(result_input.get_index()); } + convert->set_friendly_name(convert_f_name); - auto& convert_output_tensor = convert->get_output_tensor(0); - convert_output_tensor.set_names(result_input.get_names()); - - result_input.set_names({}); result->input(0).replace_source_output(convert->output(0)); result->revalidate_and_infer_types(); } @@ -359,6 +364,8 @@ bool convert_precision(ov::pass::PassBase& pass, // changing precision we need to understand which Constant consumers belongs // to the current ov::Model std::unordered_map>> const_to_internal_output; + + const auto names_compatibility_mode = f->has_rt_info("version") && f->get_rt_info("version") < 11; return convert_function_precision(f, type_to_fuse, type_to_extend, @@ -369,7 +376,8 @@ bool convert_precision(ov::pass::PassBase& pass, false, false, convert_input_output_precision, - store_original_precision_as_rt_attribute); + store_original_precision_as_rt_attribute, + names_compatibility_mode); } using precisions_set_t = std::unordered_set; diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index c2b7133506aebe..f4bdedf4764604 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -2197,8 +2197,9 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsForParameterAndResult) auto param_1 = make_shared(element::f64, Shape{3}); auto converted_param = make_shared(param_1, element::f32); auto sin = make_shared(converted_param); + sin->get_output_tensor(0).add_names({"sine:0"}); auto converted_sin = make_shared(sin, element::f64); - converted_sin->get_output_tensor(0).add_names({"sine:0"}); + converted_sin->set_friendly_name("sine.0"); auto result_sin = make_shared(converted_sin); model_ref = make_shared(result_sin, ParameterVector{param_1}); } @@ -2208,7 +2209,7 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsForParameterAndResult) ASSERT_TRUE(result.valid) << result.message; const auto& results = model->get_results(); - ASSERT_EQ("sine", results[0]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("sine.0", results[0]->get_input_node_ptr(0)->get_friendly_name()); } TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiParam) { @@ -2272,8 +2273,8 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiParam) { auto converted_mul = make_shared(mul, element::f64); auto sin = make_shared(convert_1); - converted_add->get_output_tensor(0).add_names({"add:0"}); - converted_mul->get_output_tensor(0).add_names({"mul:0"}); + add->get_output_tensor(0).add_names({"add:0"}); + mul->get_output_tensor(0).add_names({"mul:0"}); sin->get_output_tensor(0).add_names({"sine:0"}); auto result_add = make_shared(converted_add); @@ -2289,8 +2290,8 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiParam) { ASSERT_TRUE(result.valid) << result.message; const auto& results = model->get_results(); - ASSERT_EQ("add", results[0]->get_input_node_ptr(0)->get_friendly_name()); - ASSERT_EQ("mul", results[1]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("add.0", results[0]->get_input_node_ptr(0)->get_friendly_name()); + ASSERT_EQ("mul.0", results[1]->get_input_node_ptr(0)->get_friendly_name()); ASSERT_EQ("sine", results[2]->get_input_node_ptr(0)->get_friendly_name()); } @@ -2306,6 +2307,8 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp split->get_output_tensor(1).add_names({"split:1"}); split->get_output_tensor(2).add_names({"split:2"}); model = make_shared(split->outputs(), ParameterVector{param_1}); + // set version 10 to use names compatibility mode + model->get_rt_info()["version"] = static_cast(10); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = false; @@ -2322,6 +2325,9 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsSingleNodeMultipleOutp auto convert_1 = make_shared(param_1, element::f32); auto axis = opset10::Constant::create(element::i32, Shape{}, {0}); auto split = make_shared(convert_1, axis, 3); + split->get_output_tensor(0).add_names({"split:0"}); + split->get_output_tensor(1).add_names({"split:1"}); + split->get_output_tensor(2).add_names({"split:2"}); auto convert_split_0 = make_shared(split->output(0), element::f64); auto convert_split_1 = make_shared(split->output(1), element::f64); @@ -2390,6 +2396,8 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) { result.get_node()->set_friendly_name("if_result"); result.add_names({"if_result:0"}); model = make_shared(OutputVector{result}, ParameterVector{cond, param_1, param_2}); + // set version 10 to use names compatibility mode + model->get_rt_info()["version"] = static_cast(10); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = false; @@ -2443,6 +2451,7 @@ TEST(TransformationTests, ConvertPrecisionExplicitConvertsMultiSubgraphs) { if_op->set_input(convert_1, param_1_then, param_1_else); if_op->set_input(convert_2, param_2_then, param_2_else); auto result = if_op->set_output(result_then, result_else); + result.add_names({"if_result:0"}); auto converted_result = make_shared(result, element::f64); converted_result->get_output_tensor(0).add_names({"if_result:0"}); diff --git a/src/core/src/preprocess/pre_post_process.cpp b/src/core/src/preprocess/pre_post_process.cpp index d81d48082cde04..b408755a7d85a8 100644 --- a/src/core/src/preprocess/pre_post_process.cpp +++ b/src/core/src/preprocess/pre_post_process.cpp @@ -56,6 +56,10 @@ struct PrePostProcessor::PrePostProcessorImpl { PrePostProcessorImpl() = default; explicit PrePostProcessorImpl(const std::shared_ptr& f) : m_function(f) { OPENVINO_ASSERT(f, "Model can't be nullptr for PrePostProcessor"); + + // if IR version < 11, set compatibility mode + const auto names_mode = m_function->has_rt_info("version") && m_function->get_rt_info("version") < 11; + for (size_t i = 0; i < m_function->inputs().size(); ++i) { auto info = InputInfo(); info.m_impl->m_resolved_param = m_function->get_parameters()[i]; @@ -64,6 +68,7 @@ struct PrePostProcessor::PrePostProcessorImpl { for (size_t i = 0; i < m_function->outputs().size(); ++i) { auto info = OutputInfo(); info.m_impl->m_output_node = m_function->output(i); + info.m_impl->get_tensor_data()->set_names_compatibility_mode(names_mode); m_outputs.push_back(std::move(info)); } } diff --git a/src/core/src/preprocess/preprocess_impls.cpp b/src/core/src/preprocess/preprocess_impls.cpp index c2523beed66620..e0cdee2e76a140 100644 --- a/src/core/src/preprocess/preprocess_impls.cpp +++ b/src/core/src/preprocess/preprocess_impls.cpp @@ -370,30 +370,40 @@ void OutputInfo::OutputInfoImpl::build(ov::ResultVector& results) { } auto orig_parent = result->get_input_source_output(0).get_node_shared_ptr(); - // Move result tensor names from previous input to new - const auto result_input_names = result->get_input_tensor(0).get_names(); - result->get_input_tensor(0).set_names({}); - node.get_tensor().set_names(result_input_names); - - if (!post_processing_applied) { - return; - } - - if (orig_parent->get_output_size() == 1) { - node.get_node_shared_ptr()->set_friendly_name(orig_parent->get_friendly_name()); + if (get_tensor_data()->get_names_compatibility_mode()) { + // Move result tensor names from previous input to new + const auto result_input_names = result->get_input_tensor(0).get_names(); + result->get_input_tensor(0).set_names({}); + node.get_tensor().set_names(result_input_names); + + if (!post_processing_applied) { + return; + } - // Reset friendly name of input node to avoid names collision - // when there is at a new node inserted by post-processing steps - // If no new nodes are inserted by post-processing, then we need to preserve friendly name of input - // as it's required for old API correct work - result->get_input_source_output(0).get_node_shared_ptr()->set_friendly_name(""); + if (orig_parent->get_output_size() == 1) { + node.get_node_shared_ptr()->set_friendly_name(orig_parent->get_friendly_name()); + + // Reset friendly name of input node to avoid names collision + // when there is at a new node inserted by post-processing steps + // If no new nodes are inserted by post-processing, then we need to preserve friendly name of input + // as it's required for old API correct work + result->get_input_source_output(0).get_node_shared_ptr()->set_friendly_name(""); + } else if (node.get_node_shared_ptr() != orig_parent) { + // Result node is changed - add "." suffix + node.get_node_shared_ptr()->set_friendly_name( + orig_parent->get_friendly_name() + "." + + std::to_string(result->get_input_source_output(0).get_index())); + } + result->input(0).replace_source_output(node); + result->revalidate_and_infer_types(); } else if (node.get_node_shared_ptr() != orig_parent) { // Result node is changed - add "." suffix - node.get_node_shared_ptr()->set_friendly_name(orig_parent->get_friendly_name() + "." + - std::to_string(result->get_input_source_output(0).get_index())); + const auto suffix = std::string(".") + std::to_string(result->get_input_source_output(0).get_index()); + node.get_node_shared_ptr()->set_friendly_name(orig_parent->get_friendly_name() + suffix); + + result->input(0).replace_source_output(node); + result->revalidate_and_infer_types(); } - result->input(0).replace_source_output(node); - result->revalidate_and_infer_types(); // Update layout if (!context.layout().empty()) { diff --git a/src/core/src/preprocess/preprocess_impls.hpp b/src/core/src/preprocess/preprocess_impls.hpp index 87d6b5456badc3..ee74c534c361fb 100644 --- a/src/core/src/preprocess/preprocess_impls.hpp +++ b/src/core/src/preprocess/preprocess_impls.hpp @@ -122,12 +122,21 @@ class TensorInfoImplBase { return m_layout; } + void set_names_compatibility_mode(const bool compatiblity_mode) { + m_names_compatiblity_mode = compatiblity_mode; + } + + const bool get_names_compatibility_mode() const { + return m_names_compatiblity_mode; + } + protected: element::Type m_type = element::dynamic; bool m_type_set = false; Layout m_layout = Layout(); bool m_layout_set = false; + bool m_names_compatiblity_mode = false; }; class OutputTensorInfo::OutputTensorInfoImpl : public TensorInfoImplBase {}; diff --git a/src/core/tests/frontend/frontend_manager.cpp b/src/core/tests/frontend/frontend_manager.cpp index 1e42de563ddbc6..31e643e7209bdb 100644 --- a/src/core/tests/frontend/frontend_manager.cpp +++ b/src/core/tests/frontend/frontend_manager.cpp @@ -479,3 +479,29 @@ TEST(FrontEndManagerTest, Exception_Safety_Input_Model_set_tensor_value) { TEST(FrontEndManagerTest, Exception_Safety_Input_Model_set_tensor_partial_value) { CHECK_EXCEPTION_INPUT_MODEL(input_model->set_tensor_partial_value({}, {}, {})) } + +#ifdef OPENVINO_CPP_VER_17 + +TEST(FrontEndManagerTest, testFEMDestroy_InputModelHolderUsingPath) { + InputModel::Ptr input_model; + { + std::shared_ptr model; + FrontEndManager fem; + fem.register_front_end("mock1", mock_fe_path()); + auto fe = fem.load_by_framework("mock1"); + input_model = fe->load(std::filesystem::path("test")); + model = fe->convert(input_model); + EXPECT_EQ(model->get_friendly_name(), "mock1_model"); + } + ASSERT_TRUE(input_model); +} + +TEST(FrontEndManagerTest, Exception_Safety_FrontEnd_Supported_By_Path) { + EXPECT_ANY_THROW({ + FrontEndManager fem; + fem.register_front_end("mock1", mock_fe_path()); + auto fe = fem.load_by_framework("mock1"); + fe->supported(std::filesystem::path("throw_now")); + }); +} +#endif diff --git a/src/core/tests/preprocess.cpp b/src/core/tests/preprocess.cpp index 0cec67c3031288..99f2789b217b6d 100644 --- a/src/core/tests/preprocess.cpp +++ b/src/core/tests/preprocess.cpp @@ -57,6 +57,12 @@ static std::shared_ptr create_n_inputs(element::Type type, const PartialS return std::make_shared(res, params); } +namespace { +void set_model_as_v10(ov::Model& model) { + model.get_rt_info()["version"] = static_cast(10); +} +} // namespace + TEST(pre_post_process, simple_mean_scale) { auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); auto p = PrePostProcessor(f); @@ -1531,7 +1537,7 @@ TEST(pre_post_process, postprocess_convert_element_type_explicit) { auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2}); auto name = f->output().get_node_shared_ptr()->get_friendly_name(); auto name_last_op = f->get_results().front()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name(); - auto old_names = f->output().get_tensor().get_names(); + auto old_names = std::unordered_set{"tensor_output1"}; auto p = PrePostProcessor(f); p.output().postprocess().convert_element_type(element::u8); @@ -1539,7 +1545,6 @@ TEST(pre_post_process, postprocess_convert_element_type_explicit) { EXPECT_EQ(f->get_results().size(), 1); EXPECT_EQ(f->get_results()[0]->get_element_type(), element::u8); EXPECT_EQ(f->output().get_tensor().get_names(), old_names); - EXPECT_EQ(old_names.count("tensor_output1"), 1); auto ops = f->get_ordered_ops(); auto res_count = std::count_if(ops.begin(), ops.end(), [](const std::shared_ptr& n) { return std::dynamic_pointer_cast(n) != nullptr; @@ -1548,9 +1553,37 @@ TEST(pre_post_process, postprocess_convert_element_type_explicit) { auto names_count = std::count_if(ops.begin(), ops.end(), [](std::shared_ptr n) { return n->output(0).get_tensor().get_names().count("tensor_output1") > 0; }); - EXPECT_EQ(names_count, 2); // last node + result referencing to it + EXPECT_EQ(names_count, 2); // result + node connected to it has same name referencing to it EXPECT_EQ(name, f->output().get_node_shared_ptr()->get_friendly_name()); - EXPECT_EQ(name_last_op, + EXPECT_EQ(name_last_op + ".0", + f->get_results().front()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name()); +} + +TEST(pre_post_process, trivial_model_convert_element_type_explicit) { + const auto f = create_trivial(element::f32, Shape{1, 3, 2, 2}); + const auto name = f->output().get_node_shared_ptr()->get_friendly_name(); + const auto name_last_op = + f->get_results().front()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name(); + const auto old_names = std::unordered_set{"tensor_output1"}; + const auto n = f->output().get_tensor().get_names(); + auto p = PrePostProcessor(f); + + p.output().postprocess().convert_element_type(element::u8); + p.build(); + EXPECT_EQ(f->get_results().size(), 1); + EXPECT_EQ(f->get_results()[0]->get_element_type(), element::u8); + EXPECT_THAT(f->output().get_tensor().get_names(), old_names); + const auto ops = f->get_ordered_ops(); + const auto res_count = std::count_if(ops.begin(), ops.end(), [](const std::shared_ptr& n) { + return std::dynamic_pointer_cast(n) != nullptr; + }); + EXPECT_EQ(res_count, 1); + const auto names_count = std::count_if(ops.begin(), ops.end(), [](std::shared_ptr n) { + return n->output(0).get_tensor().get_names().count("tensor_output1") > 0; + }); + EXPECT_EQ(names_count, 2); // result + node connected to it has same name referencing to it + EXPECT_EQ(name, f->output().get_node_shared_ptr()->get_friendly_name()); + EXPECT_EQ(name_last_op + ".0", f->get_results().front()->get_input_source_output(0).get_node_shared_ptr()->get_friendly_name()); } @@ -1776,25 +1809,43 @@ TEST(pre_post_process, postprocess_convert_layout_invalid_dims_dyn_shape) { TEST(pre_post_process, postprocess_keep_friendly_names_compatibility) { auto f = create_simple_function(element::f32, Shape{1, 3, 10, 10}); - auto result_fr_name = f->get_results()[0]->get_friendly_name(); - auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); - auto node_name = node_before_result_old->get_friendly_name(); + const auto result_fr_name = f->get_results()[0]->get_friendly_name(); + const auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + const auto node_name = node_before_result_old->get_friendly_name(); + set_model_as_v10(*f); auto p = PrePostProcessor(f); p.output().postprocess().convert_element_type(element::u8); f = p.build(); EXPECT_EQ(f->get_results()[0]->get_friendly_name(), result_fr_name); - auto node_before_result_new = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + const auto node_before_result_new = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); // Compatibility check: verify that old name is assigned to new 'output' node EXPECT_EQ(node_before_result_new->get_friendly_name(), node_name); // Compatibility check: Verify that old name is not set for old 'output' node anymore EXPECT_NE(node_before_result_old->get_friendly_name(), node_name); } +TEST(pre_post_process, postprocess_keep_friendly_names) { + auto f = create_simple_function(element::f32, Shape{1, 3, 10, 10}); + auto result_fr_name = f->get_results()[0]->get_friendly_name(); + auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + auto node_name = node_before_result_old->get_friendly_name(); + auto p = PrePostProcessor(f); + p.output().postprocess().convert_element_type(element::u8); + f = p.build(); + EXPECT_EQ(f->get_results()[0]->get_friendly_name(), result_fr_name); + auto node_before_result_new = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + // Compatibility check: verify that old name + index is assigned to new 'output' node + EXPECT_EQ(node_before_result_new->get_friendly_name(), node_name + ".0"); + // Compatibility check: Verify that old name is not changed + EXPECT_EQ(node_before_result_old->get_friendly_name(), node_name); +} + TEST(pre_post_process, postprocess_keep_friendly_names_compatibility_implicit) { auto f = create_simple_function(element::f32, Shape{1, 3, 10, 10}); auto result_fr_name = f->get_results()[0]->get_friendly_name(); auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); auto node_name = node_before_result_old->get_friendly_name(); + set_model_as_v10(*f); auto p = PrePostProcessor(f); p.output().model().set_layout("NCHW"); p.output().tensor().set_layout("NHWC"); @@ -1807,6 +1858,21 @@ TEST(pre_post_process, postprocess_keep_friendly_names_compatibility_implicit) { EXPECT_NE(node_before_result_old->get_friendly_name(), node_name); } +TEST(pre_post_process, postprocess_keep_friendly_names_implicit) { + auto f = create_simple_function(element::f32, Shape{1, 3, 10, 10}); + const auto result_fr_name = f->get_results()[0]->get_friendly_name(); + const auto node_before_result_old = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + const auto node_name = node_before_result_old->get_friendly_name(); + auto p = PrePostProcessor(f); + p.output().model().set_layout("NCHW"); + p.output().postprocess().convert_layout("NHWC"); + f = p.build(); + EXPECT_EQ(f->get_results()[0]->get_friendly_name(), result_fr_name); + const auto node_before_result_new = f->get_results()[0]->get_input_source_output(0).get_node_shared_ptr(); + EXPECT_EQ(node_before_result_new->get_friendly_name(), node_name + ".0"); + EXPECT_EQ(node_before_result_old->get_friendly_name(), node_name); +} + // --- PostProcess - convert color format --- TEST(pre_post_process, postprocess_convert_color_format_BGR_RGB) { auto f = create_simple_function(element::f32, Shape{5, 30, 20, 3}); @@ -2017,7 +2083,11 @@ TEST(pre_post_process, postprocess_one_node_many_outputs) { results.emplace_back(res); } auto model = std::make_shared(ResultVector{results}, ParameterVector{data1}); - EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 1); + // Set tensor name to model output 0 + model->output(0).set_names({"output_split0"}); + EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); + // Result input has still tensor_split0 names from split op + EXPECT_EQ(model->output(0).get_node()->get_input_tensor(0).get_names().count("tensor_Split0"), 1); EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1); @@ -2026,9 +2096,12 @@ TEST(pre_post_process, postprocess_one_node_many_outputs) { p.output(2).tensor().set_element_type(element::f32); model = p.build(); EXPECT_EQ(model->get_results().size(), 3); - EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 1); + // Tensor names on output is lost as origin named tensor is before convert op + // New result has different precision means different tensor. + EXPECT_EQ(model->output(0).get_tensor().get_names().count("tensor_Split0"), 0); + EXPECT_EQ(model->output(0).get_tensor().get_names().count("output_split0"), 1); EXPECT_EQ(model->output(1).get_tensor().get_names().count("tensor_Split1"), 1); - EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 1); + EXPECT_EQ(model->output(2).get_tensor().get_names().count("tensor_Split2"), 0); EXPECT_EQ(model->get_results()[0]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.0"); EXPECT_EQ(model->get_results()[1]->input(0).get_source_output().get_node()->get_friendly_name(), "Split"); EXPECT_EQ(model->get_results()[2]->input(0).get_source_output().get_node()->get_friendly_name(), "Split.2"); diff --git a/src/frontends/common/include/openvino/frontend/frontend.hpp b/src/frontends/common/include/openvino/frontend/frontend.hpp index 0035382fe20c5f..bc944c17dbc0dd 100644 --- a/src/frontends/common/include/openvino/frontend/frontend.hpp +++ b/src/frontends/common/include/openvino/frontend/frontend.hpp @@ -15,6 +15,10 @@ #include "openvino/frontend/input_model.hpp" #include "openvino/frontend/visibility.hpp" +#ifdef OPENVINO_CPP_VER_17 +# include +#endif + namespace ov { namespace frontend { /// \brief An interface for identifying a frontend for a particular framework. @@ -50,7 +54,12 @@ class FRONTEND_API FrontEnd { /// \return true if model recognized, false - otherwise. template inline bool supported(const Types&... vars) const { - return supported_impl({ov::Any(vars)...}); +#ifdef OPENVINO_CPP_VER_17 + if constexpr ((std::is_same_v || ...)) { + return supported_impl({path_as_str_or_forward(vars)...}); + } else +#endif + return supported_impl({ov::Any(vars)...}); } inline bool supported(const ov::AnyVector& vars) const { return supported_impl(vars); @@ -65,7 +74,12 @@ class FRONTEND_API FrontEnd { /// \return Loaded input model. template inline InputModel::Ptr load(const Types&... vars) const { - return load_impl({ov::Any{vars}...}); +#ifdef OPENVINO_CPP_VER_17 + if constexpr ((std::is_same_v || ...)) { + return load_impl({path_as_str_or_forward(vars)...}); + } else +#endif + return load_impl({ov::Any{vars}...}); } inline InputModel::Ptr load(const ov::AnyVector& vars) const { @@ -118,8 +132,16 @@ class FRONTEND_API FrontEnd { /// \brief Registers extension /// \param library_path path to library with ov::Extension + /// \{ void add_extension(const std::string& library_path); +#ifdef OPENVINO_CPP_VER_17 + void add_extension(const std::filesystem::path& library_path) { + add_extension(library_path.string()); + } +#endif + /// \} + #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT /// \brief Registers extension @@ -162,6 +184,17 @@ class FRONTEND_API FrontEnd { private: static std::shared_ptr create_copy(const std::shared_ptr& ov_model, const std::shared_ptr& shared_object); + +#ifdef OPENVINO_CPP_VER_17 + template + static constexpr auto path_as_str_or_forward(T&& p) { + if constexpr (std::is_same_v>) { + return p.string(); + } else { + return std::forward(p); + } + } +#endif }; template <> diff --git a/src/frontends/tests/frontend/shared/src/conversion.cpp b/src/frontends/tests/frontend/shared/src/conversion.cpp index 34e4f2fd62719a..058d5534965436 100644 --- a/src/frontends/tests/frontend/shared/src/conversion.cpp +++ b/src/frontends/tests/frontend/shared/src/conversion.cpp @@ -95,3 +95,17 @@ TEST_P(FrontEndConversionExtensionTest, TestConversionExtensionViaSO) { OV_ASSERT_NO_THROW(model = frontend->convert(input_model)); ASSERT_NE(model, nullptr); } + +#ifdef OPENVINO_CPP_VER_17 +TEST_P(FrontEndConversionExtensionTest, TestConversionExtensionViaSOByPath) { + auto frontend = m_param.m_frontend; + const std::filesystem::path lib_path = get_lib_path("test_builtin_extensions"); + frontend->add_extension(lib_path); + std::shared_ptr input_model; + OV_ASSERT_NO_THROW(input_model = frontend->load(m_param.m_modelName)); + ASSERT_NE(input_model, nullptr); + std::shared_ptr model; + OV_ASSERT_NO_THROW(model = frontend->convert(input_model)); + ASSERT_NE(model, nullptr); +} +#endif diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 5674c75dd546d7..8baea3ed408656 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -801,6 +801,8 @@ struct EncryptionCallbacks { * when loading from the cache. This property is set in core.compile_model only. * - First value of the struct is encryption function. * - Second value of the struct is decryption function. + * @note GPU Plugin: encrypts whole blob, not only model structure. Only used when ov::cache_mode property is set to + * "OPTIMIZE_SIZE". * @ingroup ov_runtime_cpp_prop_api */ static constexpr Property cache_encryption_callbacks{ diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 673f6fd569a11e..f332c7c999a548 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -38,6 +38,18 @@ ov::ICore::~ICore() = default; +namespace ov { +namespace util { +template +constexpr std::array< + typename std::conditional::value, typename std::common_type::type, T>::type, + sizeof...(Args)> +make_array(Args&&... args) { + return {std::forward(args)...}; +} +} // namespace util +} // namespace ov + namespace { #ifdef PROXY_PLUGIN_ENABLED @@ -205,6 +217,18 @@ void clean_batch_properties(const std::string& deviceName, ov::AnyMap& config, c } } } + +static const auto core_properties_names = + ov::util::make_array(ov::cache_dir.name(), ov::enable_mmap.name(), ov::force_tbb_terminate.name()); + +static const auto auto_batch_properties_names = + ov::util::make_array(ov::auto_batch_timeout.name(), ov::hint::allow_auto_batching.name()); + +void remove_core_properties(ov::AnyMap& properties) { + for (const auto& name : core_properties_names) { + properties.erase(name); + } +} } // namespace bool ov::is_config_applicable(const std::string& user_device_name, const std::string& subprop_device_name) { @@ -239,22 +263,21 @@ bool ov::is_config_applicable(const std::string& user_device_name, const std::st return false; } -ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, - const AnyMap& config, - const bool keep_core_property) { +namespace { +ov::Parsed parse_device_config(const std::string& device_name, + const ov::CoreConfig& core_config, + const ov::AnyMap& properties, + const bool keep_auto_batch_property) { // check to the validity of device name - auto bracket_pos = deviceName.find(")"); + auto bracket_pos = device_name.find(")"); while (bracket_pos != std::string::npos) { - if (bracket_pos < deviceName.length() - 1 && - (deviceName[bracket_pos + 1] != ',' || bracket_pos + 1 == deviceName.length() - 1)) { - OPENVINO_THROW("Device with \"", deviceName, "\" name is illegal in the OpenVINO Runtime"); + if (bracket_pos < device_name.length() - 1 && + (device_name[bracket_pos + 1] != ',' || bracket_pos + 1 == device_name.length() - 1)) { + OPENVINO_THROW("Device with \"", device_name, "\" name is illegal in the OpenVINO Runtime"); } - bracket_pos = deviceName.find(")", bracket_pos + 1); + bracket_pos = device_name.find(")", bracket_pos + 1); } - auto updated_config = config; - auto updated_device_name = deviceName; - /** Note: auto-batching is already applied by this time, so the call: * core.compile_model("GPU", ov::device::properties("BATCH", ov::auto_batch_timeout(400))); * is transformed and we have here: @@ -268,17 +291,19 @@ ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, * So, if one day, we want to add more options in form of ov::allow_, we need to apply it before * 'flatten_sub_properties' call to have proper behavior */ + ov::Parsed parsed{device_name, flatten_sub_properties(device_name, properties), core_config}; + auto& updated_device_name = parsed._deviceName; + auto& updated_config = parsed._config; - updated_config = flatten_sub_properties(deviceName, updated_config); std::string parsed_device_priority; // try to find ':' to extract name of virtual device - auto pos = deviceName.find_first_of(':'); + auto pos = device_name.find_first_of(':'); if (pos != std::string::npos) { - updated_device_name = deviceName.substr(0, pos); - parsed_device_priority = deviceName.substr(pos + 1); + updated_device_name = device_name.substr(0, pos); + parsed_device_priority = device_name.substr(pos + 1); } else { - ov::DeviceIDParser parser(deviceName); + ov::DeviceIDParser parser(device_name); updated_device_name = parser.get_device_name(); parsed_device_priority = parser.get_device_id(); } @@ -295,20 +320,44 @@ ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, OPENVINO_THROW("Device priority / ID mismatch: ", parsed_device_priority, " (from ", - deviceName, + device_name, ") vs ", it->second.as(), " (from config)"); } }; + parsed._core_config.set(updated_config); // keep batch property only when called from query_supported_property - if (!keep_core_property) { - clean_batch_properties(updated_device_name, updated_config, ov::hint::allow_auto_batching); - clean_batch_properties(updated_device_name, updated_config, ov::auto_batch_timeout); + if (!keep_auto_batch_property) { + for (const auto& name : auto_batch_properties_names) { + clean_batch_properties(updated_device_name, updated_config, name); + } } + return parsed; +} +} // namespace + +ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, + const AnyMap& config, + const bool keep_auto_batch_property) { + return parseDeviceNameIntoConfig(deviceName, CoreConfig{}, config, keep_auto_batch_property); +} - return {std::move(updated_device_name), std::move(updated_config)}; +ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, + const CoreConfig& coreConfig, + const AnyMap& config, + const bool keep_auto_batch_property) { + auto parsed = parse_device_config(deviceName, coreConfig, config, keep_auto_batch_property); + + // remove core properties for HW devices + if (!is_virtual_device(parsed._deviceName)) { + for (const auto& name : {ov::enable_mmap.name(), ov::force_tbb_terminate.name()}) { + // note: ov::cache_dir kept as plugin may require it + parsed._config.erase(name); + } + } + return parsed; } ov::CoreImpl::CoreImpl() { @@ -663,8 +712,7 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const { { OPENVINO_SUPPRESS_DEPRECATED_START if (device_supports_cache_dir(plugin)) { - ov::AnyMap empty_map; - auto cacheConfig = coreConfig.get_cache_config_for_device(plugin, empty_map); + auto cacheConfig = coreConfig.get_cache_config_for_device(plugin); if (cacheConfig._cacheManager) { desc.defaultConfig[ov::cache_dir.name()] = cacheConfig._cacheDir; } @@ -737,13 +785,14 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr< // if auto-batching is applicable, the below function will patch the device name and config accordingly: auto model = apply_auto_batching(model_, deviceName, config_with_batch); - auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(device_name)); + auto parsed = parseDeviceNameIntoConfig(deviceName, coreConfig, config_with_batch, is_proxy_device(deviceName)); auto plugin = get_plugin(parsed._deviceName); ov::SoPtr res; - auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + // will consume ov::cache_dir if plugin not support it + auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager; // Skip caching for proxy plugin. HW plugin will load network from the cache if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { - CacheContent cacheContent{cacheManager}; + CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()}; cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); res = load_model_from_cache(cacheContent, plugin, parsed._config, ov::SoPtr{}, [&]() { @@ -770,13 +819,14 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr< // if auto-batching is applicable, the below function will patch the device name and config accordingly: auto model = apply_auto_batching(model_, deviceName, config_with_batch); - auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch, is_proxy_device(deviceName)); + auto parsed = parseDeviceNameIntoConfig(deviceName, coreConfig, config_with_batch, is_proxy_device(deviceName)); auto plugin = get_plugin(parsed._deviceName); ov::SoPtr res; - auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + // will consume ov::cache_dir if plugin not support it + auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager; // Skip caching for proxy plugin. HW plugin will load network from the cache if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { - CacheContent cacheContent{cacheManager}; + CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()}; cacheContent.blobId = ov::ModelCache::compute_hash(model, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); res = load_model_from_cache(cacheContent, plugin, parsed._config, context, [&]() { @@ -792,21 +842,22 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod const std::string& device_name, const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Path"); - auto parsed = parseDeviceNameIntoConfig(device_name, config); + auto parsed = parseDeviceNameIntoConfig(device_name, coreConfig, config); // in case of compile_model(file_name), we need to clear-up core-level properties auto plugin = get_plugin(parsed._deviceName); ov::SoPtr compiled_model; - - auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + // will consume ov::cache_dir if plugin not support it + auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager; if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { // Skip caching for proxy plugin. HW plugin will load network from the cache - CacheContent cacheContent{cacheManager, model_path}; + CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap(), model_path}; cacheContent.blobId = ov::ModelCache::compute_hash(model_path, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); compiled_model = load_model_from_cache(cacheContent, plugin, parsed._config, ov::SoPtr{}, [&]() { - auto model = read_model(model_path, std::string{}); + auto model = + ov::util::read_model(model_path, std::string{}, extensions, parsed._core_config.get_enable_mmap()); return compile_model_and_cache(plugin, model, parsed._config, {}, cacheContent); }); } else { @@ -820,15 +871,14 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod const std::string& device_name, const ov::AnyMap& config) const { OV_ITT_SCOPED_TASK(ov::itt::domains::OV, "Core::compile_model::from_memory"); - auto parsed = parseDeviceNameIntoConfig(device_name, config); - // in case of compile_model(file_name), we need to clear-up core-level properties + auto parsed = parseDeviceNameIntoConfig(device_name, coreConfig, config); auto plugin = get_plugin(parsed._deviceName); ov::SoPtr compiled_model; - - auto cacheManager = coreConfig.get_cache_config_for_device(plugin, parsed._config)._cacheManager; + // will consume ov::cache_dir if plugin not support it + auto cacheManager = parsed._core_config.get_cache_config_for_device(plugin, parsed._config)._cacheManager; // Skip caching for proxy plugin. HW plugin will load network from the cache if (cacheManager && device_supports_model_caching(plugin) && !is_proxy_device(plugin)) { - CacheContent cacheContent{cacheManager}; + CacheContent cacheContent{cacheManager, parsed._core_config.get_enable_mmap()}; cacheContent.blobId = ov::ModelCache::compute_hash(model_str, weights, create_compile_config(plugin, parsed._config)); std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); @@ -948,7 +998,7 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n // ov::device::priority cannot be shared, because it's specific for current virtual // plugin. So, we need to remove ov::device::priorities from the list, because it's // supposed to be set for current virtual plugin and cannot be propagated down - ov::AnyMap return_properties = user_properties; + auto return_properties = user_properties; auto device_priorities_it = return_properties.find(ov::device::priorities.name()); if (device_priorities_it != return_properties.end()) { return_properties.erase(device_priorities_it); @@ -957,30 +1007,24 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n return return_properties; } - static const std::vector core_level_properties = { - ov::cache_dir.name(), - ov::force_tbb_terminate.name(), - // auto-batch properties are also treated as core-level - ov::auto_batch_timeout.name(), - ov::hint::allow_auto_batching.name(), - }; - - const auto flattened = ov::parseDeviceNameIntoConfig(full_device_name, user_properties, true); - const std::string& device_name = flattened._deviceName; + const auto flattened = parse_device_config(full_device_name, {}, user_properties, keep_core_property); const auto& flattened_config = flattened._config; + const auto& device_name = flattened._deviceName; // virtual plugins should bypass core-level properties to HW plugins // so, we need to report them as supported std::vector supported_config_keys; + auto key_inserter = std::back_inserter(supported_config_keys); if (keep_core_property) { - supported_config_keys = core_level_properties; + key_inserter = std::copy(core_properties_names.begin(), core_properties_names.end(), key_inserter); + key_inserter = std::copy(auto_batch_properties_names.begin(), auto_batch_properties_names.end(), key_inserter); } // try to search against OV API 2.0' mutable supported_properties try { for (auto&& property : ICore::get_property(device_name, ov::supported_properties, {})) { if (property.is_mutable()) { - supported_config_keys.emplace_back(std::move(property)); + *key_inserter = std::move(property); } } } catch (ov::Exception&) { @@ -990,7 +1034,7 @@ ov::AnyMap ov::CoreImpl::get_supported_property(const std::string& full_device_n try { for (auto&& property : ICore::get_property(device_name, ov::internal::supported_properties, {})) { if (property.is_mutable()) { - supported_config_keys.emplace_back(std::move(property)); + *key_inserter = std::move(property); } } } catch (ov::Exception&) { @@ -1160,8 +1204,7 @@ ov::Any ov::CoreImpl::get_property(const std::string& device_name, if (parsed._deviceName.empty()) { return get_property_for_core(name); } else if (name == ov::cache_dir.name()) { - ov::AnyMap empty_map; - return coreConfig.get_cache_config_for_device(get_plugin(parsed._deviceName), empty_map)._cacheDir; + return coreConfig.get_cache_config_for_device(get_plugin(parsed._deviceName))._cacheDir; } return get_plugin(parsed._deviceName).get_property(name, parsed._config); } @@ -1299,9 +1342,7 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st { OPENVINO_SUPPRESS_DEPRECATED_START if (device_supports_cache_dir(plugin.second)) { - ov::AnyMap empty_map = {}; - configCopy[ov::cache_dir.name()] = - coreConfig.get_cache_config_for_device(plugin.second, empty_map)._cacheDir; + configCopy[ov::cache_dir.name()] = coreConfig.get_cache_config_for_device(plugin.second)._cacheDir; } else if (configCopy.count(ov::cache_dir.name()) > 0) { // Remove "CACHE_DIR" from config if it is not supported by plugin configCopy.erase(ov::cache_dir.name()); @@ -1411,8 +1452,8 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( try { cacheContent.cacheManager->read_cache_entry( cacheContent.blobId, - coreConfig.get_enable_mmap() && ov::util::contains(plugin.get_property(ov::internal::supported_properties), - ov::internal::caching_with_mmap), + cacheContent.mmap_enabled && ov::util::contains(plugin.get_property(ov::internal::supported_properties), + ov::internal::caching_with_mmap), [&](std::istream& networkStream, std::shared_ptr model_buffer) { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, @@ -1516,7 +1557,16 @@ ov::AnyMap ov::CoreImpl::create_compile_config(const ov::Plugin& plugin, const o return compile_config; } -void ov::CoreImpl::CoreConfig::set_and_update(ov::AnyMap& config) { +ov::CoreConfig::CoreConfig(const CoreConfig& other) { + { + std::lock_guard lock(other._cacheConfigMutex); + _cacheConfig = other._cacheConfig; + _cacheConfigPerDevice = other._cacheConfigPerDevice; + } + _flag_enable_mmap = other._flag_enable_mmap; +} + +void ov::CoreConfig::set(const ov::AnyMap& config) { auto it = config.find(ov::cache_dir.name()); if (it != config.end()) { std::lock_guard lock(_cacheConfigMutex); @@ -1526,43 +1576,44 @@ void ov::CoreImpl::CoreConfig::set_and_update(ov::AnyMap& config) { for (auto& deviceCfg : _cacheConfigPerDevice) { deviceCfg.second = CoreConfig::CacheConfig::create(it->second.as()); } - config.erase(it); } it = config.find(ov::force_tbb_terminate.name()); if (it != config.end()) { auto flag = it->second.as(); ov::threading::executor_manager()->set_property({{it->first, flag}}); - config.erase(it); } it = config.find(ov::enable_mmap.name()); if (it != config.end()) { auto flag = it->second.as(); _flag_enable_mmap = flag; - config.erase(it); } } -void ov::CoreImpl::CoreConfig::set_cache_dir_for_device(const std::string& dir, const std::string& name) { +void ov::CoreConfig::set_and_update(ov::AnyMap& config) { + set(config); + remove_core_properties(config); +} + +void ov::CoreConfig::set_cache_dir_for_device(const std::string& dir, const std::string& name) { std::lock_guard lock(_cacheConfigMutex); _cacheConfigPerDevice[name] = CoreConfig::CacheConfig::create(dir); } -std::string ov::CoreImpl::CoreConfig::get_cache_dir() const { +std::string ov::CoreConfig::get_cache_dir() const { std::lock_guard lock(_cacheConfigMutex); return _cacheConfig._cacheDir; } -bool ov::CoreImpl::CoreConfig::get_enable_mmap() const { +bool ov::CoreConfig::get_enable_mmap() const { return _flag_enable_mmap; } // Creating thread-safe copy of config including shared_ptr to ICacheManager // Passing empty or not-existing name will return global cache config -ov::CoreImpl::CoreConfig::CacheConfig ov::CoreImpl::CoreConfig::get_cache_config_for_device( - const ov::Plugin& plugin, - ov::AnyMap& parsedConfig) const { +ov::CoreConfig::CacheConfig ov::CoreConfig::get_cache_config_for_device(const ov::Plugin& plugin, + ov::AnyMap& parsedConfig) const { // cache_dir is enabled locally in compile_model only if (parsedConfig.count(ov::cache_dir.name())) { const auto& cache_dir_val = parsedConfig.at(ov::cache_dir.name()).as(); @@ -1575,16 +1626,16 @@ ov::CoreImpl::CoreConfig::CacheConfig ov::CoreImpl::CoreConfig::get_cache_config } return tempConfig; } else { // cache_dir is set to Core globally or for the specific device - std::lock_guard lock(_cacheConfigMutex); - if (_cacheConfigPerDevice.count(plugin.get_name()) > 0) { - return _cacheConfigPerDevice.at(plugin.get_name()); - } else { - return _cacheConfig; - } + return get_cache_config_for_device(plugin); } } -ov::CoreImpl::CoreConfig::CacheConfig ov::CoreImpl::CoreConfig::CacheConfig::create(const std::string& dir) { +ov::CoreConfig::CacheConfig ov::CoreConfig::get_cache_config_for_device(const ov::Plugin& plugin) const { + std::lock_guard lock(_cacheConfigMutex); + return _cacheConfigPerDevice.count(plugin.get_name()) ? _cacheConfigPerDevice.at(plugin.get_name()) : _cacheConfig; +} + +ov::CoreConfig::CacheConfig ov::CoreConfig::CacheConfig::create(const std::string& dir) { std::shared_ptr cache_manager = nullptr; if (!dir.empty()) { diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp index 7cf12f3ba3280c..7bbab14e4d8c14 100644 --- a/src/inference/src/dev/core_impl.hpp +++ b/src/inference/src/dev/core_impl.hpp @@ -22,14 +22,91 @@ using CreatePluginEngineFunc = void(std::shared_ptr<::ov::IPlugin>&); const std::string DEFAULT_DEVICE_NAME = "DEFAULT_DEVICE"; +class CoreConfig final { +public: + CoreConfig() = default; + CoreConfig(const CoreConfig& other); + CoreConfig& operator=(const CoreConfig&) = delete; + + struct CacheConfig { + std::string _cacheDir; + std::shared_ptr _cacheManager; + + static CacheConfig create(const std::string& dir); + }; + + void set(const ov::AnyMap& config); + + /** + * @brief Removes core-level properties from config and triggers new state for core config + * @param config - config to be updated + */ + void set_and_update(ov::AnyMap& config); + + OPENVINO_DEPRECATED("Don't use this method, it will be removed soon") + void set_cache_dir_for_device(const std::string& dir, const std::string& name); + + std::string get_cache_dir() const; + + bool get_enable_mmap() const; + + CacheConfig get_cache_config_for_device(const ov::Plugin& plugin, ov::AnyMap& parsedConfig) const; + + // Creating thread-safe copy of global config including shared_ptr to ICacheManager + CacheConfig get_cache_config_for_device(const ov::Plugin& plugin) const; + +private: + mutable std::mutex _cacheConfigMutex; + CacheConfig _cacheConfig; + std::map _cacheConfigPerDevice; + bool _flag_enable_mmap = true; +}; + struct Parsed { std::string _deviceName; AnyMap _config; + CoreConfig _core_config; }; +/** + * @brief Provides Parsed device name and configuration. + * + * Uses default core configuration updated with user properties from config. + * The core properties are removed from user configuration for HW devices only. + * @note The `CACHE_DIR` is not removed from compiled configuration. + * + * @param deviceName Device name to be parsed + * @param config User configuration to be parsed. + * @param keep_auto_batch_property If set keep auto batch properties in compile properties. + * @return Parsed: + * - device name + * - compile properties + * - core configuration + */ +Parsed parseDeviceNameIntoConfig(const std::string& deviceName, + const AnyMap& config = {}, + const bool keep_auto_batch_property = false); + +/** + * @brief Provides Parsed device name and configuration. + * + * Uses user core configuration which is updated with user properties from config. + * The core properties are removed from user configuration for HW devices only. + * @note The `CACHE_DIR` is not removed from compiled configuration. + * + * @param deviceName Device name to be parsed + * @param coreConfig Core configuration used as base for parsed output. + * @param config User configuration to be parsed. + * @param keep_auto_batch_property If set keep auto batch properties in compile properties. + * @return Parsed: + * - device name + * - compile properties + * - core configuration + */ Parsed parseDeviceNameIntoConfig(const std::string& deviceName, + const CoreConfig& coreConfig, const AnyMap& config = {}, - const bool keep_core_property = false); + const bool keep_auto_batch_property = false); /** * @brief Checks whether config is applicable for device with 'device_name' @@ -61,47 +138,17 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this _cacheManager; - - static CacheConfig create(const std::string& dir); - }; - - /** - * @brief Removes core-level properties from config and triggers new state for core config - * @param config - config to be updated - */ - void set_and_update(ov::AnyMap& config); - - OPENVINO_DEPRECATED("Don't use this method, it will be removed soon") - void set_cache_dir_for_device(const std::string& dir, const std::string& name); - - std::string get_cache_dir() const; - - bool get_enable_mmap() const; - - // Creating thread-safe copy of config including shared_ptr to ICacheManager - // Passing empty or not-existing name will return global cache config - CacheConfig get_cache_config_for_device(const ov::Plugin& plugin, ov::AnyMap& parsedConfig) const; - - private: - mutable std::mutex _cacheConfigMutex; - CacheConfig _cacheConfig; - std::map _cacheConfigPerDevice; - bool _flag_enable_mmap = true; - }; - struct CacheContent { explicit CacheContent(const std::shared_ptr& cache_manager, + bool mmap_enabled = false, const std::string model_path = {}) : cacheManager(cache_manager), - modelPath(model_path) {} + modelPath(model_path), + mmap_enabled{mmap_enabled} {} std::shared_ptr cacheManager; std::string blobId = {}; std::string modelPath = {}; + bool mmap_enabled = false; }; // Core settings (cache config, etc) @@ -291,7 +338,9 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this create_context(const std::string& device_name, const AnyMap& args) const override; - ov::AnyMap get_supported_property(const std::string& device_name, const ov::AnyMap& config, const bool keep_core_property = true) const override; + ov::AnyMap get_supported_property(const std::string& device_name, + const ov::AnyMap& config, + const bool keep_core_property = true) const override; ov::SoPtr get_default_context(const std::string& device_name) const override; diff --git a/src/plugins/auto/tests/functional/behavior/caching_test.cpp b/src/plugins/auto/tests/functional/behavior/caching_test.cpp index 1ef107cd59991f..196d2519250a5d 100644 --- a/src/plugins/auto/tests/functional/behavior/caching_test.cpp +++ b/src/plugins/auto/tests/functional/behavior/caching_test.cpp @@ -190,4 +190,4 @@ TEST_F(AutoFuncTests, compiled_with_cache_enabled_batch_enabled) { ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 5); core.set_property(ov::cache_dir("")); #endif -} \ No newline at end of file +} diff --git a/src/plugins/auto_batch/src/plugin.hpp b/src/plugins/auto_batch/src/plugin.hpp index 37a777cc970b6a..563ba4487ee3ec 100644 --- a/src/plugins/auto_batch/src/plugin.hpp +++ b/src/plugins/auto_batch/src/plugin.hpp @@ -68,4 +68,4 @@ class Plugin : public ov::IPlugin { mutable ov::AnyMap m_plugin_config; }; } // namespace autobatch_plugin -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp index 2e8bde43abeed4..8e1be37c91b1ef 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/binary_buffer.hpp @@ -20,12 +20,17 @@ class BinaryOutputBuffer : public OutputBuffer { BinaryOutputBuffer(std::ostream& stream) : OutputBuffer(this), stream(stream), _impl_params(nullptr), _strm(nullptr) {} - void write(void const * data, std::streamsize size) { + virtual ~BinaryOutputBuffer() = default; + + virtual void write(void const* data, std::streamsize size) { auto const written_size = stream.rdbuf()->sputn(reinterpret_cast(data), size); OPENVINO_ASSERT(written_size == size, - "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size)); + "[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + + std::to_string(written_size)); } + virtual void flush() {} + void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } void set_stream(void* strm) { _strm = strm; } @@ -42,7 +47,9 @@ class BinaryInputBuffer : public InputBuffer { BinaryInputBuffer(std::istream& stream, engine& engine) : InputBuffer(this, engine), _stream(stream), _impl_params(nullptr) {} - void read(void* const data, std::streamsize size) { + virtual ~BinaryInputBuffer() = default; + + virtual void read(void* const data, std::streamsize size) { auto const read_size = _stream.rdbuf()->sgetn(reinterpret_cast(data), size); OPENVINO_ASSERT(read_size == size, "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); @@ -51,14 +58,73 @@ class BinaryInputBuffer : public InputBuffer { void setKernelImplParams(void* impl_params) { _impl_params = impl_params; } void* getKernelImplParams() const { return _impl_params; } - std::streampos tellg() { return _stream.tellg(); } - void seekg(std::streampos pos) { _stream.seekg(pos); } - private: std::istream& _stream; void* _impl_params; }; +class EncryptedBinaryOutputBuffer : public BinaryOutputBuffer { +public: + EncryptedBinaryOutputBuffer(std::ostream& stream, std::function encrypt) + : BinaryOutputBuffer(stream), + encrypt(encrypt) { + OPENVINO_ASSERT(encrypt); + } + + ~EncryptedBinaryOutputBuffer() override = default; + + void write(void const* data, std::streamsize size) override { + plaintext_str.append(reinterpret_cast(data), size); + } + + void flush() override { + auto encrypted_str = encrypt(plaintext_str); + size_t bytes = encrypted_str.size(); + BinaryOutputBuffer::write(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes)); + BinaryOutputBuffer::write(make_data(encrypted_str.c_str(), encrypted_str.size()).data, encrypted_str.size()); + } + +private: + std::string + plaintext_str; // Not using stringstream here because passing to encrypt() would produce an additional copy. + std::function encrypt; +}; + +class EncryptedBinaryInputBuffer : public BinaryInputBuffer { +public: + EncryptedBinaryInputBuffer(std::istream& stream, + engine& engine, + std::function decrypt) + : BinaryInputBuffer(stream, engine), + decrypt(decrypt) { + OPENVINO_ASSERT(decrypt); + + size_t bytes; + BinaryInputBuffer::read(make_data(&bytes, sizeof(bytes)).data, sizeof(bytes)); + + // Not reading directly to plaintext_stream because decrypt(plaintext_stream.str()) would create an additional + // copy. + std::string str(bytes, 0); + BinaryInputBuffer::read( + make_data(const_cast(reinterpret_cast(str.c_str())), str.size()).data, + str.size()); + plaintext_stream.str(decrypt(str)); + } + + ~EncryptedBinaryInputBuffer() override = default; + + void read(void* const data, std::streamsize size) override { + auto const read_size = plaintext_stream.rdbuf()->sgetn(reinterpret_cast(data), size); + OPENVINO_ASSERT( + read_size == size, + "[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size)); + } + +private: + std::stringstream plaintext_stream; + std::function decrypt; +}; + template class Serializer::value>::type> { public: diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 18e7a88fc42f3e..810353fe626c19 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -179,7 +179,16 @@ void CompiledModel::export_model(std::ostream& model) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::export_model"); OPENVINO_ASSERT(!m_graphs.empty(), "[GPU] Model not loaded"); - cldnn::BinaryOutputBuffer ob(model); + const ov::EncryptionCallbacks encryption_callbacks = m_config.get_property(ov::cache_encryption_callbacks); + + // Do not allow encryption for CacheMode::OPTIMIZE_SPEED - the cache size may cause severe memory penalty. + const bool encryption_enabled = encryption_callbacks.encrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; + std::unique_ptr ob_ptr = + encryption_enabled + ? cldnn::make_unique(model, encryption_callbacks.encrypt) + : cldnn::make_unique(model); + auto& ob = *ob_ptr; + ob << cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); // Inputs @@ -222,6 +231,7 @@ void CompiledModel::export_model(std::ostream& model) const { } get_graph(0)->export_model(ob); + ob.flush(); } std::shared_ptr CompiledModel::get_runtime_model() const { diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index f2fa9bcdeeab1b..f6c15bc2e8943a 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -339,12 +339,21 @@ std::shared_ptr Plugin::import_model(std::istream& model, config.set_user_property(_orig_config); config.apply_user_properties(context_impl->get_engine().get_device_info()); - cldnn::BinaryInputBuffer ib(model, context_impl->get_engine()); + ov::CacheMode cache_mode = config.get_property(ov::cache_mode); + ov::EncryptionCallbacks encryption_callbacks = config.get_property(ov::cache_encryption_callbacks); + const bool encryption_enabled = encryption_callbacks.decrypt && cache_mode == ov::CacheMode::OPTIMIZE_SIZE; - ov::CacheMode cache_mode = ov::CacheMode::OPTIMIZE_SPEED; - ib >> cldnn::make_data(&cache_mode, sizeof(ov::CacheMode)); + std::unique_ptr ib_ptr = + encryption_enabled ? cldnn::make_unique(model, + context_impl->get_engine(), + encryption_callbacks.decrypt) + : cldnn::make_unique(model, context_impl->get_engine()); + auto& ib = *ib_ptr; - if (cache_mode != config.get_property(ov::cache_mode)) { + ov::CacheMode loaded_cache_mode = ov::CacheMode::OPTIMIZE_SPEED; + ib >> cldnn::make_data(&loaded_cache_mode, sizeof(ov::CacheMode)); + + if (loaded_cache_mode != cache_mode) { return nullptr; } @@ -608,6 +617,8 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}, ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW}, + ov::PropertyName{ov::cache_encryption_callbacks.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::kv_cache_precision.name(), PropertyMutability::RW}, }; return supported_properties; diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp index 17e1ed6d0a9bbe..1f911d4a0f2070 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp @@ -13,21 +13,24 @@ #include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp" #include "common_test_utils/test_common.hpp" #include "openvino/pass/serialize.hpp" +#include "openvino/util/codec_xor.hpp" namespace { -typedef std::tuple testParams; +typedef std::tuple testParams; class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface { public: static std::string get_test_case_name(::testing::TestParamInfo obj) { bool use_compile_model_api_; + bool do_encryption_; ov::element::Type inference_mode_; ov::element::Type model_dtype_; - std::tie(use_compile_model_api_, inference_mode_, model_dtype_) = obj.param; + std::tie(use_compile_model_api_, do_encryption_, inference_mode_, model_dtype_) = obj.param; std::ostringstream result; const char separator = '_'; result << "use_compile_model_api=" << use_compile_model_api_ << separator; + result << "_do_encryption=" << do_encryption_; result << "inference_mode=" << inference_mode_ << separator; result << "model_dtype=" << model_dtype_; return result.str(); @@ -40,6 +43,7 @@ class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::W std::string cache_path; std::string cache_dir; bool use_compile_model_api; // for loading from cache + bool do_encryption; ov::element::Type inference_mode; ov::element::Type model_dtype; @@ -55,7 +59,7 @@ void CheckWeightlessCacheAccuracy::SetUp() { cache_path = filePrefix + ".blob"; cache_dir = filePrefix + "_cache_dir"; - std::tie(use_compile_model_api, inference_mode, model_dtype) = GetParam(); + std::tie(use_compile_model_api, do_encryption, inference_mode, model_dtype) = GetParam(); } void CheckWeightlessCacheAccuracy::TearDown() { @@ -75,6 +79,14 @@ void CheckWeightlessCacheAccuracy::run() { ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path), ov::hint::inference_precision(inference_mode)}; + + if (do_encryption) { + ov::EncryptionCallbacks encryption_callbacks; + encryption_callbacks.encrypt = ov::util::codec_xor; + encryption_callbacks.decrypt = ov::util::codec_xor; + config.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + config_with_weights_path.insert(ov::cache_encryption_callbacks(encryption_callbacks)); + } auto core = ov::test::utils::PluginCache::get().core(); ov::pass::Serialize(xml_path, bin_path).run_on_model(model); @@ -150,6 +162,7 @@ const std::vector model_dtypes = { INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy, ::testing::Combine(::testing::Bool(), + ::testing::Bool(), ::testing::ValuesIn(inference_modes), ::testing::ValuesIn(model_dtypes)), CheckWeightlessCacheAccuracy::get_test_case_name); diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp index f66df99c7b1c43..20d72f0fad5a60 100644 --- a/src/plugins/template/src/plugin.cpp +++ b/src/plugins/template/src/plugin.cpp @@ -257,15 +257,18 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const return ro_properties; }; const auto& default_rw_properties = []() { - std::vector rw_properties{ov::device::id, - ov::enable_profiling, - ov::hint::performance_mode, - ov::hint::num_requests, - ov::hint::inference_precision, - ov::hint::execution_mode, - ov::num_streams, - ov::template_plugin::disable_transformations, - ov::log::level}; + std::vector rw_properties{ + ov::device::id, + ov::enable_profiling, + ov::hint::performance_mode, + ov::hint::num_requests, + ov::hint::inference_precision, + ov::hint::execution_mode, + ov::num_streams, + ov::template_plugin::disable_transformations, + ov::log::level, + ov::hint::model_priority, + }; return rw_properties; }; if (ov::supported_properties == name) { @@ -280,7 +283,9 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const } else if (ov::internal::supported_properties == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::inference_num_threads.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::threads_per_stream.name(), ov::PropertyMutability::RW}}; } else if (ov::available_devices == name) { // TODO: fill list of available devices return decltype(ov::available_devices)::value_type{{""}}; diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_infer_request/properties_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_infer_request/properties_tests.hpp index 76b110e9a5e655..26ba7f59245c2f 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_infer_request/properties_tests.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_infer_request/properties_tests.hpp @@ -121,6 +121,19 @@ TEST_P(InferRequestPropertiesTest, ReusableCPUStreamsExecutor) { } } } + +TEST_P(InferRequestPropertiesTest, ConfigHasUnsupportedPluginProperty) { + configuration.insert({ov::enable_mmap(false)}); + if (target_device.find(ov::test::utils::DEVICE_AUTO) == std::string::npos && + target_device.find(ov::test::utils::DEVICE_MULTI) == std::string::npos && + target_device.find(ov::test::utils::DEVICE_HETERO) == std::string::npos && + target_device.find(ov::test::utils::DEVICE_BATCH) == std::string::npos) { + OV_ASSERT_NO_THROW(core->set_property(target_device, configuration)); + } + // Compile model to target plugins + execNet = core->compile_model(function, target_device, configuration); + OV_ASSERT_NO_THROW(execNet.create_infer_request()); +} } // namespace behavior } // namespace test } // namespace ov diff --git a/tools/benchmark_tool/openvino/__init__.py b/tools/benchmark_tool/openvino/__init__.py index e4d1a247520332..69c678909b1c9e 100644 --- a/tools/benchmark_tool/openvino/__init__.py +++ b/tools/benchmark_tool/openvino/__init__.py @@ -27,11 +27,11 @@ from openvino import properties as properties # Import most important classes and functions from openvino.runtime -from openvino.runtime import Model -from openvino.runtime import Core -from openvino.runtime import CompiledModel -from openvino.runtime import InferRequest -from openvino.runtime import AsyncInferQueue +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue from openvino.runtime import Symbol from openvino.runtime import Dimension @@ -43,12 +43,13 @@ from openvino.runtime import Tensor from openvino.runtime import OVAny -from openvino.runtime import compile_model +# Helper functions for openvino module +from openvino.runtime.utils.data_helpers import tensor_from_file +from openvino._ov_api import compile_model from openvino.runtime import get_batch from openvino.runtime import set_batch from openvino.runtime import serialize from openvino.runtime import shutdown -from openvino.runtime import tensor_from_file from openvino.runtime import save_model from openvino.runtime import layout_helpers diff --git a/tools/ovc/openvino/__init__.py b/tools/ovc/openvino/__init__.py index e4d1a247520332..69c678909b1c9e 100644 --- a/tools/ovc/openvino/__init__.py +++ b/tools/ovc/openvino/__init__.py @@ -27,11 +27,11 @@ from openvino import properties as properties # Import most important classes and functions from openvino.runtime -from openvino.runtime import Model -from openvino.runtime import Core -from openvino.runtime import CompiledModel -from openvino.runtime import InferRequest -from openvino.runtime import AsyncInferQueue +from openvino._ov_api import Model +from openvino._ov_api import Core +from openvino._ov_api import CompiledModel +from openvino._ov_api import InferRequest +from openvino._ov_api import AsyncInferQueue from openvino.runtime import Symbol from openvino.runtime import Dimension @@ -43,12 +43,13 @@ from openvino.runtime import Tensor from openvino.runtime import OVAny -from openvino.runtime import compile_model +# Helper functions for openvino module +from openvino.runtime.utils.data_helpers import tensor_from_file +from openvino._ov_api import compile_model from openvino.runtime import get_batch from openvino.runtime import set_batch from openvino.runtime import serialize from openvino.runtime import shutdown -from openvino.runtime import tensor_from_file from openvino.runtime import save_model from openvino.runtime import layout_helpers