diff --git a/src/deepsparse/__init__.py b/src/deepsparse/__init__.py index 6c7d0f1cac..83fc4d9632 100644 --- a/src/deepsparse/__init__.py +++ b/src/deepsparse/__init__.py @@ -33,7 +33,6 @@ from .engine import * from .tasks import * from .pipeline import * -from .base_pipeline import * from .loggers import * from .version import __version__, is_release from .analytics import deepsparse_analytics as _analytics diff --git a/src/deepsparse/clip/captioning_pipeline.py b/src/deepsparse/clip/captioning_pipeline.py index 4e99484b6f..cc8a082c2a 100644 --- a/src/deepsparse/clip/captioning_pipeline.py +++ b/src/deepsparse/clip/captioning_pipeline.py @@ -27,7 +27,7 @@ import torch import torch.nn.functional as F from deepsparse.clip import CLIPDecoderInput, CLIPTextInput, CLIPVisualInput -from deepsparse.pipeline import BasePipeline, Pipeline +from deepsparse.legacy.pipeline import BasePipeline, Pipeline __all__ = ["CLIPCaptionInput", "CLIPCaptionOutput", "CLIPCaptionPipeline"] diff --git a/src/deepsparse/clip/decoder_pipeline.py b/src/deepsparse/clip/decoder_pipeline.py index 6bc1347012..28388b3a74 100644 --- a/src/deepsparse/clip/decoder_pipeline.py +++ b/src/deepsparse/clip/decoder_pipeline.py @@ -17,7 +17,7 @@ import numpy as np from pydantic import BaseModel, Field -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.utils import model_to_path diff --git a/src/deepsparse/clip/text_pipeline.py b/src/deepsparse/clip/text_pipeline.py index 1d1dbc22ad..89ed8c0f11 100644 --- a/src/deepsparse/clip/text_pipeline.py +++ b/src/deepsparse/clip/text_pipeline.py @@ -17,7 +17,7 @@ import numpy as np from pydantic import BaseModel, Field -from deepsparse.pipeline import Pipeline +from deepsparse.legacy.pipeline import Pipeline from deepsparse.utils import model_to_path from open_clip.tokenizer import tokenize diff --git a/src/deepsparse/clip/visual_pipeline.py b/src/deepsparse/clip/visual_pipeline.py index ac71c6c6e1..b827e2db64 100644 --- a/src/deepsparse/clip/visual_pipeline.py +++ b/src/deepsparse/clip/visual_pipeline.py @@ -22,7 +22,7 @@ from torchvision.transforms import InterpolationMode from deepsparse.clip.constants import CLIP_RGB_MEANS, CLIP_RGB_STDS -from deepsparse.pipeline import Pipeline +from deepsparse.legacy.pipeline import Pipeline from deepsparse.pipelines.computer_vision import ComputerVisionSchema from deepsparse.utils import model_to_path diff --git a/src/deepsparse/clip/zeroshot_pipeline.py b/src/deepsparse/clip/zeroshot_pipeline.py index 56c0df062e..fec1c7de54 100644 --- a/src/deepsparse/clip/zeroshot_pipeline.py +++ b/src/deepsparse/clip/zeroshot_pipeline.py @@ -19,7 +19,7 @@ from pydantic import BaseModel, Field from deepsparse.clip import CLIPTextInput, CLIPVisualInput -from deepsparse.pipeline import BasePipeline, Pipeline +from deepsparse.legacy.pipeline import BasePipeline, Pipeline from scipy.special import softmax diff --git a/src/deepsparse/evaluation/cli.py b/src/deepsparse/evaluation/cli.py index 674859cace..bd557a3cf7 100644 --- a/src/deepsparse/evaluation/cli.py +++ b/src/deepsparse/evaluation/cli.py @@ -77,7 +77,11 @@ ) from src.deepsparse.evaluation.results import Result, save_result from src.deepsparse.evaluation.utils import args_to_dict, get_save_path -from src.deepsparse.pipeline import DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE +from src.deepsparse.operators.engine_operator import ( + DEEPSPARSE_ENGINE, + ORT_ENGINE, + TORCHSCRIPT_ENGINE, +) _LOGGER = logging.getLogger(__name__) diff --git a/src/deepsparse/evaluation/evaluator.py b/src/deepsparse/evaluation/evaluator.py index d58fef7714..43948c6c51 100644 --- a/src/deepsparse/evaluation/evaluator.py +++ b/src/deepsparse/evaluation/evaluator.py @@ -19,7 +19,11 @@ ) from src.deepsparse.evaluation.registry import EvaluationRegistry from src.deepsparse.evaluation.results import Result -from src.deepsparse.pipeline import DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE +from src.deepsparse.operators.engine_operator import ( + DEEPSPARSE_ENGINE, + ORT_ENGINE, + TORCHSCRIPT_ENGINE, +) __all__ = ["evaluate"] diff --git a/src/deepsparse/evaluation/utils.py b/src/deepsparse/evaluation/utils.py index 4685932084..1091b8d4e3 100644 --- a/src/deepsparse/evaluation/utils.py +++ b/src/deepsparse/evaluation/utils.py @@ -17,7 +17,8 @@ from transformers import AutoModelForCausalLM -from deepsparse import DEEPSPARSE_ENGINE, ORT_ENGINE, Pipeline +from deepsparse import Pipeline +from deepsparse.operators.engine_operator import DEEPSPARSE_ENGINE, ORT_ENGINE __all__ = ["text_generation_model_from_target", "get_save_path", "args_to_dict"] diff --git a/src/deepsparse/image_classification/__init__.py b/src/deepsparse/image_classification/__init__.py index ddb21bd1f7..4009e35e41 100644 --- a/src/deepsparse/image_classification/__init__.py +++ b/src/deepsparse/image_classification/__init__.py @@ -25,9 +25,12 @@ "Please install deepsparse[image_classification] to use this pathway" ) - from .constants import * -from .pipelines import * +from .pipeline import * + +# flake8: noqa +from .postprocess_operator import * +from .preprocess_operator import * from .schemas import * diff --git a/src/deepsparse/v2/image_classification/pipeline.py b/src/deepsparse/image_classification/pipeline.py similarity index 73% rename from src/deepsparse/v2/image_classification/pipeline.py rename to src/deepsparse/image_classification/pipeline.py index 3d7887a701..738ff980bf 100644 --- a/src/deepsparse/v2/image_classification/pipeline.py +++ b/src/deepsparse/image_classification/pipeline.py @@ -13,19 +13,19 @@ # limitations under the License. import logging -import warnings from typing import Dict, Optional, Tuple, Union -from deepsparse.v2.image_classification.postprocess_operator import ( +from deepsparse.image_classification.postprocess_operator import ( ImageClassificationPostProcess, ) -from deepsparse.v2.image_classification.preprocess_operator import ( +from deepsparse.image_classification.preprocess_operator import ( ImageClassificationPreProcess, ) -from deepsparse.v2.operators.engine_operator import EngineOperator -from deepsparse.v2.pipeline import Pipeline -from deepsparse.v2.routers.router import LinearRouter -from deepsparse.v2.schedulers.scheduler import OperatorScheduler +from deepsparse.operators.engine_operator import EngineOperator +from deepsparse.operators.registry import OperatorRegistry +from deepsparse.pipeline import Pipeline +from deepsparse.routers.router import LinearRouter +from deepsparse.schedulers.scheduler import OperatorScheduler _LOGGER = logging.getLogger(__name__) @@ -33,20 +33,23 @@ __all__ = ["ImageClassificationPipeline"] +@OperatorRegistry.register(name="image_classification") class ImageClassificationPipeline(Pipeline): def __init__( self, model_path: str, - engine_kwargs: Optional[Dict] = None, class_names: Union[None, str, Dict[str, str]] = None, image_size: Optional[Tuple[int]] = None, top_k: int = 1, + **engine_kwargs, ): + if not engine_kwargs: engine_kwargs = {} engine_kwargs["model_path"] = model_path elif engine_kwargs.get("model_path") != model_path: - warnings.warn(f"Updating engine_kwargs to include {model_path}") + _LOGGER.warning(f"Updating engine_kwargs to include {model_path}") + engine_kwargs["model_path"] = model_path engine = EngineOperator(**engine_kwargs) preproces = ImageClassificationPreProcess( diff --git a/src/deepsparse/v2/image_classification/postprocess_operator.py b/src/deepsparse/image_classification/postprocess_operator.py similarity index 98% rename from src/deepsparse/v2/image_classification/postprocess_operator.py rename to src/deepsparse/image_classification/postprocess_operator.py index 9231113368..214c115e70 100644 --- a/src/deepsparse/v2/image_classification/postprocess_operator.py +++ b/src/deepsparse/image_classification/postprocess_operator.py @@ -18,7 +18,7 @@ import numpy from pydantic import BaseModel, Field -from deepsparse.v2.operators import Operator +from deepsparse.operators import Operator class ImageClassificationOutput(BaseModel): diff --git a/src/deepsparse/v2/image_classification/preprocess_operator.py b/src/deepsparse/image_classification/preprocess_operator.py similarity index 99% rename from src/deepsparse/v2/image_classification/preprocess_operator.py rename to src/deepsparse/image_classification/preprocess_operator.py index 9b4517a44c..2f26c3afaa 100644 --- a/src/deepsparse/v2/image_classification/preprocess_operator.py +++ b/src/deepsparse/image_classification/preprocess_operator.py @@ -23,8 +23,8 @@ IMAGENET_RGB_MEANS, IMAGENET_RGB_STDS, ) +from deepsparse.operators import Operator from deepsparse.pipelines.computer_vision import ComputerVisionSchema -from deepsparse.v2.operators import Operator class ImageClassificationInput(ComputerVisionSchema): diff --git a/src/deepsparse/image_classification/validation_script.py b/src/deepsparse/image_classification/validation_script.py index 4a0a884084..9cd4e14c30 100644 --- a/src/deepsparse/image_classification/validation_script.py +++ b/src/deepsparse/image_classification/validation_script.py @@ -27,13 +27,13 @@ on Imagenette [default: zoo:cv/classificati on/resnet_v1-50/pytorch/sparseml/imagenette/ base-none] + --image-size, --image_size INTEGER + integer size to evaluate images at (will be + reshaped to square shape) [default: 224] --batch-size, --batch_size INTEGER Test batch size, must divide the dataset evenly, else last batch will be dropped [default: 1] - --image-size, --image_size INTEGER - integer size to evaluate images at (will be - reshaped to square shape) [default: 224] --num-cores, --num_cores INTEGER Number of CPU cores to run deepsparse with, default is all available @@ -213,11 +213,10 @@ def main( pipeline = Pipeline.create( task="image_classification", model_path=model_path, + engine_type=engine, batch_size=batch_size, num_cores=num_cores, - engine_type=engine, ) - print(f"engine info: {pipeline.engine}") correct = total = 0 progress_bar = tqdm(data_loader) diff --git a/src/deepsparse/v2/utils/__init__.py b/src/deepsparse/legacy/__init__.py similarity index 86% rename from src/deepsparse/v2/utils/__init__.py rename to src/deepsparse/legacy/__init__.py index 75935a9729..0e53b4e85d 100644 --- a/src/deepsparse/v2/utils/__init__.py +++ b/src/deepsparse/legacy/__init__.py @@ -1,5 +1,3 @@ -# flake8: noqa - # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import * -from .state import * -from .types import * +# flake8: noqa -from .data import * # isort:skip +from .base_pipeline import * +from .pipeline import * +from .tasks import * diff --git a/src/deepsparse/base_pipeline.py b/src/deepsparse/legacy/base_pipeline.py similarity index 98% rename from src/deepsparse/base_pipeline.py rename to src/deepsparse/legacy/base_pipeline.py index 156ea38656..c5d006fc80 100644 --- a/src/deepsparse/base_pipeline.py +++ b/src/deepsparse/legacy/base_pipeline.py @@ -19,10 +19,10 @@ from pydantic import BaseModel from deepsparse import Context +from deepsparse.legacy.tasks import SupportedTasks, dynamic_import_task from deepsparse.loggers.base_logger import BaseLogger from deepsparse.loggers.build_logger import logger_from_config from deepsparse.loggers.constants import validate_identifier -from deepsparse.tasks import SupportedTasks, dynamic_import_task __all__ = [ @@ -166,7 +166,7 @@ def create( implementation :return: pipeline object initialized for the given task """ - from deepsparse.pipeline import Bucketable, BucketingPipeline, Pipeline + from deepsparse.legacy.pipeline import Bucketable, BucketingPipeline, Pipeline pipeline_constructor = BasePipeline._get_task_constructor(task) model_path = kwargs.get("model_path", None) @@ -278,7 +278,7 @@ def from_config( logging. Default is None :return: loaded Pipeline object from the config """ - from deepsparse.pipeline import PipelineConfig + from deepsparse.legacy.pipeline import PipelineConfig if isinstance(config, Path) or ( isinstance(config, str) and os.path.exists(config) @@ -308,7 +308,7 @@ def to_config(self) -> "PipelineConfig": # noqa: F821 """ :return: PipelineConfig that can be used to reload this object """ - from deepsparse.pipeline import PipelineConfig + from deepsparse.legacy.pipeline import PipelineConfig if not hasattr(self, "task"): raise RuntimeError( diff --git a/src/deepsparse/v2/image_classification/__init__.py b/src/deepsparse/legacy/image_classification/__init__.py similarity index 85% rename from src/deepsparse/v2/image_classification/__init__.py rename to src/deepsparse/legacy/image_classification/__init__.py index 8668227df7..10a3971bf8 100644 --- a/src/deepsparse/v2/image_classification/__init__.py +++ b/src/deepsparse/legacy/image_classification/__init__.py @@ -13,8 +13,5 @@ # limitations under the License. # flake8: noqa -from .postprocess_operator import * -from .preprocess_operator import * - -from .pipeline import * # isort:skip +from .pipelines import * diff --git a/src/deepsparse/image_classification/pipelines.py b/src/deepsparse/legacy/image_classification/pipelines.py similarity index 99% rename from src/deepsparse/image_classification/pipelines.py rename to src/deepsparse/legacy/image_classification/pipelines.py index d55a5d138d..dd6bd7bb86 100644 --- a/src/deepsparse/image_classification/pipelines.py +++ b/src/deepsparse/legacy/image_classification/pipelines.py @@ -31,7 +31,7 @@ ImageClassificationInput, ImageClassificationOutput, ) -from deepsparse.pipeline import Pipeline +from deepsparse.legacy.pipeline import Pipeline from deepsparse.utils import model_to_path diff --git a/src/deepsparse/legacy/pipeline.py b/src/deepsparse/legacy/pipeline.py new file mode 100644 index 0000000000..7f38587707 --- /dev/null +++ b/src/deepsparse/legacy/pipeline.py @@ -0,0 +1,1348 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Classes and registry for end to end inference pipelines that wrap an underlying +inference engine and include pre/postprocessing +""" +import os +from abc import ABC, abstractmethod +from concurrent.futures import ThreadPoolExecutor +from functools import partial +from pathlib import Path +from typing import Any, Dict, Generator, List, Optional, Tuple, Type, Union + +import numpy +from pydantic import BaseModel, Field + +from deepsparse import Context, Engine, MultiModelEngine, Scheduler +from deepsparse.benchmark import ORTEngine, TorchScriptEngine +from deepsparse.cpu import cpu_details +from deepsparse.legacy.base_pipeline import ( + _REGISTERED_PIPELINES, + BasePipeline, + SupportedTasks, +) +from deepsparse.loggers.base_logger import BaseLogger +from deepsparse.loggers.constants import MetricCategories, SystemGroups +from deepsparse.utils import ( + InferenceStages, + StagedTimer, + TimerManager, + join_engine_outputs, + split_engine_inputs, +) + + +__all__ = [ + "DEEPSPARSE_ENGINE", + "ORT_ENGINE", + "TORCHSCRIPT_ENGINE", + "SUPPORTED_PIPELINE_ENGINES", + "Pipeline", + "BasePipeline", + "SupportedTasks", + "_REGISTERED_PIPELINES", + "PipelineConfig", + "question_answering_pipeline", + "text_classification_pipeline", + "zero_shot_text_classification_pipeline", + "token_classification_pipeline", + "image_classification_pipeline", + "yolo_pipeline", + "Bucketable", + "BucketingPipeline", + "create_engine", + "TextGeneration", + "CodeGeneration", + "Chat", +] + +DEEPSPARSE_ENGINE = "deepsparse" +ORT_ENGINE = "onnxruntime" +TORCHSCRIPT_ENGINE = "torchscript" + +SUPPORTED_PIPELINE_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE] + + +class Pipeline(BasePipeline): + """ + Generic Pipeline abstract class meant to wrap inference engine objects to include + data pre/post-processing. Inputs and outputs of pipelines should be serialized + as pydantic Models. See the BasePipeline above for additional parameters provided + during inference. + + Pipelines should not be instantiated by their constructors, but rather the + `Pipeline.create()` method. The task name given to `create` will be used to + load the appropriate pipeline. When creating a Pipeline, the pipeline should + inherit from `Pipeline` and implement the `setup_onnx_file_path`, `process_inputs`, + `process_engine_outputs`, `input_schema`, and `output_schema` abstract methods. + + Finally, the class definition should be decorated by the `Pipeline.register` + function. This defines the task name and task aliases for the pipeline and + ensures that it will be accessible by `Pipeline.create`. The implemented + `Pipeline` subclass must be imported at runtime to be accessible. + + Pipeline lifecycle: + - On instantiation + * `onnx_file_path` <- `setup_onnx_file_path` + * `engine` <- `_initialize_engine` + + - on __call__: + * `parsed_inputs: input_schema` <- `parse_inputs(*args, **kwargs)` + * `pre_processed_inputs` <- `process_inputs(parsed_inputs)` + * `engine_outputs` <- `engine(pre_processed_inputs)` + * `outputs: output_schema` <- `process_engine_outputs(engine_outputs)` + + Example use of register: + ```python + @Pipeline.register( + task="example_task", + task_aliases=["example_alias_1", "example_alias_2"], + ) + class PipelineImplementation(Pipeline): + # implementation of Pipeline abstract methods here + ``` + + Example use of pipeline: + ```python + example_pipeline = Pipeline.create( + task="example_task", + model_path="model.onnx", + ) + pipeline_outputs = example_pipeline(pipeline_inputs) + ``` + + :param model_path: path on local system or SparseZoo stub to load the model from + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. None represents + dynamic batch mode (Pipeline will accept any batch size). Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param num_streams: The max number of requests the model can handle + concurrently. None or 0 implies a scheduler-defined default value; + default None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param context: Optional Context object to use for creating instances of + MultiModelEngine. The Context contains a shared scheduler along with + other runtime information that will be used across instances of the + MultiModelEngine to provide optimal performance when running multiple + models concurrently + :param executor: An optional ThreadPoolExecutor() object, if provided the + pipeline executes inference requests in a non-blocking manner and returns + a Future object, call Future.result() on returned object to get the result. + Can also accept an int number of workers, a ThreadPoolExecutor object is + auto-initialized with the specified integer in that case; None represents + synchronous execution - if running in dynamic batch mode a default + ThreadPoolExecutor with default workers equal to the number of available + cores / 2 + """ + + def __init__( + self, + model_path: str, + engine_type: str = DEEPSPARSE_ENGINE, + batch_size: Optional[int] = 1, + num_cores: int = None, + num_streams: int = None, + scheduler: Scheduler = None, + input_shapes: List[List[int]] = None, + context: Optional[Context] = None, + executor: Optional[Union[ThreadPoolExecutor, int]] = None, + benchmark: bool = False, + _delay_engine_initialize: bool = False, # internal use only + **kwargs, + ): + self._benchmark = benchmark + self._model_path_orig = model_path + self._model_path = model_path + self._engine_type = engine_type + self._batch_size = batch_size + self._timer_manager = TimerManager(enabled=True, multi=benchmark) + self.context = context + super().__init__(**kwargs) + + self.executor, self._num_async_workers = _initialize_executor_and_workers( + batch_size=batch_size, + workers_or_executor=executor, + ) + + if self.context is not None: + num_cores = num_cores or self.context.num_cores + if self.context.num_cores != num_cores: + raise ValueError( + f"num_cores mismatch. Expected {self.context.num_cores} " + f"from passed context, but got {num_cores} while " + f"instantiating Pipeline" + ) + + self._engine_args = dict( + batch_size=self._batch_size or 1, # bs=1 for dynamic batch + num_cores=num_cores, + input_shapes=input_shapes, + ) + if engine_type.lower() == DEEPSPARSE_ENGINE: + self._engine_args["scheduler"] = scheduler + self._engine_args["num_streams"] = num_streams + + self.onnx_file_path = self.setup_onnx_file_path() + + if _delay_engine_initialize: + self.engine = None + else: + self.engine = self._initialize_engine() + self._batch_size = self._batch_size or 1 + + self.log( + identifier=f"{SystemGroups.INFERENCE_DETAILS}/num_cores_total", + value=num_cores, + category=MetricCategories.SYSTEM, + ) + + def __call__(self, *args, **kwargs) -> BaseModel: + with self.timer_manager.new_timer_context() as timer: + if "engine_inputs" in kwargs: + raise ValueError( + "invalid kwarg engine_inputs. engine inputs determined " + f"by {self.__class__.__qualname__}.parse_inputs" + ) + + # ------ PREPROCESSING ------ + timer.start(InferenceStages.PRE_PROCESS) + # parse inputs into input_schema + pipeline_inputs = self.parse_inputs(*args, **kwargs) + self.log( + identifier="pipeline_inputs", + value=pipeline_inputs, + category=MetricCategories.DATA, + ) + + if not isinstance(pipeline_inputs, self.input_schema): + raise RuntimeError( + f"Unable to parse {self.__class__} inputs into a " + f"{self.input_schema} object. " + f"Inputs parsed to {type(pipeline_inputs)}" + ) + # batch size of the inputs may be `> self._batch_size` at this point + engine_inputs = self.process_inputs(pipeline_inputs) + if isinstance(engine_inputs, tuple): + engine_inputs, context = engine_inputs + else: + context = {} + + timer.stop(InferenceStages.PRE_PROCESS) + self.log( + identifier="engine_inputs", + value=engine_inputs, + category=MetricCategories.DATA, + ) + + # ------ INFERENCE ------ + # split inputs into batches of size `self._batch_size` + timer.start(InferenceStages.ENGINE_FORWARD) + batches, orig_batch_size = self.split_engine_inputs( + engine_inputs, self._batch_size + ) + + # submit split batches to engine threadpool + engine_forward_with_context = partial(self.engine_forward, context=context) + batch_outputs = list( + self.executor.map(engine_forward_with_context, batches) + ) + + # join together the batches of size `self._batch_size` + engine_outputs = self.join_engine_outputs( + batch_outputs, orig_batch_size, **context + ) + timer.stop(InferenceStages.ENGINE_FORWARD) + + self.log( + identifier=f"{SystemGroups.INFERENCE_DETAILS}/input_batch_size_total", + # to get the batch size of the inputs, we need to look + # to multiply the engine batch size (self._batch_size) + # by the number of batches processed by the engine during + # a single inference call + value=len(batch_outputs) * self._batch_size, + category=MetricCategories.SYSTEM, + ) + self.log( + identifier="engine_outputs", + value=engine_outputs, + category=MetricCategories.DATA, + ) + + # ------ POSTPROCESSING ------ + timer.start(InferenceStages.POST_PROCESS) + pipeline_outputs = self.process_engine_outputs(engine_outputs, **context) + if not isinstance(pipeline_outputs, (self.output_schema, Generator)): + raise ValueError( + f"Outputs of {self.__class__} must be instances of " + f"{self.output_schema} found output of type " + f"{type(pipeline_outputs)}" + ) + timer.stop(InferenceStages.POST_PROCESS) + self.log( + identifier="pipeline_outputs", + value=pipeline_outputs, + category=MetricCategories.DATA, + ) + + self.log_inference_times(timer) + + return pipeline_outputs + + @classmethod + def from_config( + cls, + config: Union["PipelineConfig", str, Path], + context: Optional[Context] = None, + logger: Optional[BaseLogger] = None, + ) -> "Pipeline": + """ + :param config: PipelineConfig object, filepath to a json serialized + PipelineConfig, or raw string of a json serialized PipelineConfig + :param context: Optional Context object to use for creating instances of + MultiModelEngine. The Context contains a shared scheduler along with + other runtime information that will be used across instances of the + MultiModelEngine to provide optimal performance when running + multiple models concurrently + :param logger: An optional DeepSparse Logger object for inference + logging. Default is None + :return: loaded Pipeline object from the config + """ + if isinstance(config, Path) or ( + isinstance(config, str) and os.path.exists(config) + ): + if isinstance(config, str): + config = Path(config) + config = PipelineConfig.parse_file(config) + if isinstance(config, str): + config = PipelineConfig.parse_raw(config) + + return cls.create( + task=config.task, + model_path=config.model_path, + engine_type=config.engine_type, + batch_size=config.batch_size, + num_cores=config.num_cores, + scheduler=config.scheduler, + input_shapes=config.input_shapes, + alias=config.alias, + context=context, + logger=logger, + **config.kwargs, + ) + + @abstractmethod + def setup_onnx_file_path(self) -> str: + """ + Performs any setup to unwrap and process the given `model_path` and other + class properties into an inference ready onnx file to be compiled by the + engine of the pipeline + + :return: file path to the ONNX file for the engine to compile + """ + raise NotImplementedError() + + @abstractmethod + def process_inputs( + self, + inputs: BaseModel, + ) -> Union[List[numpy.ndarray], Tuple[List[numpy.ndarray], Dict[str, Any]]]: + """ + :param inputs: inputs to the pipeline. Must be the type of the `input_schema` + of this pipeline + :return: inputs of this model processed into a list of numpy arrays that + can be directly passed into the forward pass of the pipeline engine. Can + also include a tuple with engine inputs and special key word arguments + to pass to process_engine_outputs to facilitate information from the raw + inputs to postprocessing that may not be included in the engine inputs + """ + raise NotImplementedError() + + @abstractmethod + def process_engine_outputs( + self, + engine_outputs: List[numpy.ndarray], + **kwargs, + ) -> BaseModel: + """ + :param engine_outputs: list of numpy arrays that are the output of the engine + forward pass + :return: outputs of engine post-processed into an object in the `output_schema` + format of this pipeline + """ + raise NotImplementedError() + + @property + def model_path_orig(self) -> str: + """ + :return: value originally passed to the `model_path` argument to initialize + this Pipeline + """ + return self._model_path_orig + + @property + def model_path(self) -> str: + """ + :return: path on local system to the onnx file of this model or directory + containing a model.onnx file along with supporting files + """ + return self._model_path + + @property + def engine_args(self) -> Dict[str, Any]: + """ + :return: arguments besides onnx filepath used to instantiate engine + """ + return self._engine_args + + @property + def engine_type(self) -> str: + """ + :return: type of inference engine used for model forward pass + """ + return self._engine_type + + @property + def timer_manager(self) -> TimerManager: + return self._timer_manager + + @property + def current_timer(self) -> Optional[StagedTimer]: + """ + :return: current timer for the pipeline, if any + """ + timer = self.timer_manager.current + + if timer is None: + timer = self.timer_manager.latest + + return timer + + @property + def benchmark(self) -> bool: + return self._benchmark + + @benchmark.setter + def benchmark(self, value: bool): + self._benchmark = value + self.timer_manager.multi = value + + def to_config(self) -> "PipelineConfig": + """ + :return: PipelineConfig that can be used to reload this object + """ + + if not hasattr(self, "task"): + raise RuntimeError( + f"{self.__class__} instance has no attribute task. Pipeline objects " + "must have a task to be serialized to a config. Pipeline objects " + "must be declared with the Pipeline.register object to be assigned a " + "task" + ) + + # parse any additional properties as kwargs + kwargs = {} + for attr_name, attr in self.__class__.__dict__.items(): + if isinstance(attr, property) and attr_name not in dir(PipelineConfig): + kwargs[attr_name] = getattr(self, attr_name) + + return PipelineConfig( + task=self.task, + model_path=self.model_path_orig, + engine_type=self.engine_type, + batch_size=self._batch_size, + num_cores=self._engine_args.get("num_cores"), + scheduler=self._engine_args.get("scheduler"), + input_shapes=self._engine_args.get("input_shapes"), + alias=self.alias, + kwargs=kwargs, + ) + + def join_engine_outputs( + self, batch_outputs: List[List[numpy.ndarray]], orig_batch_size: int, **kwargs + ) -> List[numpy.ndarray]: + """ + Joins list of engine outputs together into one list. + This is the opposite of `split_engine_inputs` and is meant to be used in tandem. + + :param batch_outputs: list of engine outputs + :param orig_batch_size: original batch size of the inputs + :return: list of engine outputs joined together + """ + return join_engine_outputs(batch_outputs, orig_batch_size) + + def split_engine_inputs( + self, items: List[numpy.ndarray], batch_size: int + ) -> List[List[numpy.ndarray]]: + """ + Splits each item into numpy arrays with the first dimension == `batch_size`. + This is the opposite of `join_engine_outputs` and is meant to be used in tandem. + + :param items: size of each batch to split into + :param batch_size: size of each batch to enforce + + :return: list of batches, where each batch is a list of numpy arrays + """ + return split_engine_inputs(items, batch_size) + + def engine_forward( + self, + engine_inputs: List[numpy.ndarray], + context: Dict = {}, + ) -> List[numpy.ndarray]: + """ + :param engine_inputs: list of numpy inputs to Pipeline engine forward + pass + :param context: optional dictionary to be used during engine execution + :return: result of forward pass to Pipeline engine + """ + return self.engine(engine_inputs) + + def log_inference_times(self, timer: StagedTimer): + """ + logs stage times in the given timer + + :param timer: timer to log + """ + for stage, time in timer.times.items(): + self.log( + identifier=f"{SystemGroups.PREDICTION_LATENCY}/{stage}_seconds", + value=time, + category=MetricCategories.SYSTEM, + ) + + def _initialize_engine( + self, + ) -> Union[Engine, MultiModelEngine, ORTEngine, TorchScriptEngine]: + return create_engine( + self.onnx_file_path, self.engine_type, self._engine_args, self.context + ) + + def _properties_dict(self) -> Dict: + return { + "config": self.to_config(), + "engine": self.engine, + } + + def __repr__(self): + """ + :return: Unambiguous representation of the current pipeline + """ + return "{}({})".format(self.__class__, self._properties_dict()) + + def __str__(self): + """ + :return: Human readable form of the current pipeline + """ + formatted_props = [ + "\t{}: {}".format(key, val) for key, val in self._properties_dict().items() + ] + + return "{}.{}:\n{}".format( + self.__class__.__module__, + self.__class__.__qualname__, + "\n".join(formatted_props), + ) + + +class PipelineConfig(BaseModel): + """ + Configuration for creating a Pipeline object + + Can be used to create a Pipeline from a config object or file with + Pipeline.from_config(), or used as a building block for other configs + such as for deepsparse.server + """ + + task: str = Field( + description="name of task to create a pipeline for", + ) + model_path: str = Field( + default=None, + description="path on local system or SparseZoo stub to load the model from", + ) + engine_type: str = Field( + default=DEEPSPARSE_ENGINE, + description=( + "inference engine to use. Currently supported values include " + "'deepsparse' and 'onnxruntime'. Default is 'deepsparse'" + ), + ) + batch_size: Optional[int] = Field( + default=1, + description=("static batch size to use for inference. Default is 1"), + ) + num_cores: int = Field( + default=None, + description=( + "number of CPU cores to allocate for inference engine. None" + "specifies all available cores. Default is None" + ), + ) + scheduler: Optional[str] = Field( + default="async", + description=( + "(deepsparse only) kind of scheduler to execute with. Defaults to async" + ), + ) + input_shapes: List[List[int]] = Field( + default=None, + description=( + "list of shapes to set ONNX the inputs to. Pass None to use model as-is. " + "Default is None" + ), + ) + alias: str = Field( + default=None, + description=( + "optional name to give this pipeline instance, useful when inferencing " + "with multiple models. Default is None" + ), + ) + kwargs: Dict[str, Any] = Field( + default={}, + description=( + "Additional arguments for inference with the model that will be passed " + "into the pipeline as kwargs" + ), + ) + + +class BucketingPipeline(object): + """ + A Proxy class that adds Bucketing functionality to Pipelines + + :param pipelines: A list of Pipeline objects/buckets that implement + `Bucketable` contract + """ + + def __init__(self, pipelines: List[Pipeline]): + if not (pipelines and isinstance(pipelines, list)): + raise ValueError( + "Expected a non empty List of pipeline objects but got " f"{pipelines}" + ) + self._pipelines = pipelines + self._pipeline_class = pipelines[0].__class__ + self._validate_pipeline_class() + + def __call__(self, *args, **kwargs): + bucket, parsed_inputs = self._choose_bucket(*args, **kwargs) + return bucket(parsed_inputs) + + def _choose_bucket(self, *args, **kwargs): + parsed_inputs = self._pipelines[-1].parse_inputs(*args, **kwargs) + bucket = self._pipeline_class.route_input_to_bucket( + input_schema=parsed_inputs, + pipelines=self._pipelines, + ) + return bucket, parsed_inputs + + def __getattr__(self, item): + value = getattr(self._pipelines[0].__class__, item) + + if isinstance(value, property): + return getattr(self._pipelines[0], item) + + raise AttributeError( + f"{item} not found in {self.__class__.__name__}, " + f"and is not a property of {self._pipeline_class.__name__}" + ) + + @property + def input_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that inputs to this pipeline must comply to + """ + return self._pipelines[0].input_schema + + @property + def output_schema(self) -> Type[BaseModel]: + """ + :return: pydantic model class that outputs of this pipeline must comply to + """ + return self._pipelines[0].output_schema + + def _validate_pipeline_class(self): + # validate all pipelines belong to the same class + + if not issubclass(self._pipeline_class, Bucketable): + raise ValueError(f"{self._pipeline_class} is not Bucketable") + + is_valid = all( + isinstance(pipeline, self._pipeline_class) for pipeline in self._pipelines + ) + + if not is_valid: + raise ValueError( + "All Pipeline Buckets must belong to the same Pipeline Class" + ) + + +class Bucketable(ABC): + """ + A contract, that ensures implementing Pipeline class can create multiple Pipeline + instances and route each input sample to correct instance based off of specific + implementations of abstract methods defined in this contract + """ + + @staticmethod + @abstractmethod + def should_bucket(*args, **kwargs) -> bool: + """ + :returns: True if buckets should be created else False + """ + pass + + @staticmethod + @abstractmethod + def create_pipeline_buckets(*args, **kwargs) -> List[Pipeline]: + """ + :return: Create and return a list of Pipeline objects + representing different buckets + """ + pass + + @staticmethod + @abstractmethod + def route_input_to_bucket( + *args, input_schema: BaseModel, pipelines: List[Pipeline], **kwargs + ) -> Pipeline: + """ + :param input_schema: The schema representing an input to the pipeline + :param pipelines: Different buckets to be used + :return: The correct Pipeline object (or Bucket) to route input to + """ + pass + + +def create_engine( + onnx_file_path: str, + engine_type: str, + engine_args: Dict, + context: Optional[Context] = None, +) -> Union[Engine, MultiModelEngine, ORTEngine]: + """ + Create an inference engine for a given ONNX model + + :param onnx_file_path: path to ONNX model file + :param engine_type: type of engine to create. + :param engine_args: arguments to pass to engine constructor + :param context: context to use for engine + :return: inference engine + """ + engine_type = engine_type.lower() + + if engine_type == DEEPSPARSE_ENGINE: + if context is not None and isinstance(context, Context): + engine_args.pop("num_cores", None) + engine_args.pop("scheduler", None) + engine_args.pop("num_streams", None) + engine_args["context"] = context + return MultiModelEngine( + model=onnx_file_path, + **engine_args, + ) + engine_args.pop("cache_output_bools", None) + return Engine(onnx_file_path, **engine_args) + + if engine_type == ORT_ENGINE: + return ORTEngine(onnx_file_path, **engine_args) + + if engine_type == TORCHSCRIPT_ENGINE: + return TorchScriptEngine(onnx_file_path, **engine_args) + + raise ValueError( + f"Unknown engine_type {engine_type}. Supported values include: " + f"{SUPPORTED_PIPELINE_ENGINES}" + ) + + +def _initialize_executor_and_workers( + batch_size: Optional[int], + workers_or_executor: Optional[Union[int, ThreadPoolExecutor]], +) -> Tuple[Optional[ThreadPoolExecutor], int]: + if isinstance(workers_or_executor, ThreadPoolExecutor): + num_async_workers = workers_or_executor._max_workers # noqa + executor = workers_or_executor + elif isinstance(workers_or_executor, int): + num_async_workers = max(1, workers_or_executor) + executor = ThreadPoolExecutor(max_workers=num_async_workers) + elif batch_size is None and workers_or_executor is None: + # default num workers to num available cores / 2 + num_cpu_cores_avaailable = cpu_details()[0] + num_async_workers = max(1, num_cpu_cores_avaailable // 2) + executor = ThreadPoolExecutor(max_workers=num_async_workers) + elif workers_or_executor is not None: + raise ValueError( + "Expected an int or ThreadPoolExecutor to run in async mode" + f" but got {workers_or_executor} of type {type(workers_or_executor)}" + ) + else: + executor = ThreadPoolExecutor(max_workers=1) + num_async_workers = 1 + + if batch_size is None and executor is None: + raise ValueError( + "Must have an ThreadPoolExecutor for running in dynamic batch mode " + f"but got {None}" + ) + + return executor, num_async_workers + + +def text_generation_pipeline( + *args, model: Optional[str] = None, **kwargs +) -> "Pipeline": + """ + :return: text generation pipeline with the given args and + kwargs passed to Pipeline.create + """ + kwargs = _parse_model_arg(model, **kwargs) + return Pipeline.create("text_generation", *args, **kwargs) + + +def code_generation_pipeline( + *args, model: Optional[str] = None, **kwargs +) -> "Pipeline": + """ + :return: text generation pipeline with the given args and + kwargs passed to Pipeline.create + """ + kwargs = _parse_model_arg(model, **kwargs) + return Pipeline.create("code_generation", *args, **kwargs) + + +def chat_pipeline(*args, model: Optional[str] = None, **kwargs) -> "Pipeline": + """ + :return: text generation pipeline with the given args and + kwargs passed to Pipeline.create + """ + kwargs = _parse_model_arg(model, **kwargs) + return Pipeline.create("chat", *args, **kwargs) + + +def _parse_model_arg(model: Optional[str], **kwargs) -> dict: + if model is not None: + model_path = kwargs.get("model_path") + if model_path is not None: + raise ValueError( + f"Only one of model and model_path may be supplied, found {model} " + f"and {model_path} respectively" + ) + kwargs["model_path"] = model + return kwargs + + +# aliases for top level import +TextGeneration = text_generation_pipeline +CodeGeneration = code_generation_pipeline +Chat = chat_pipeline + + +def question_answering_pipeline(*args, **kwargs) -> "Pipeline": + """ + transformers question_answering pipeline + + example instantiation: + ```python + question_answering = Pipeline.create( + task="question_answering", + model_path="question_answering_model_dir/", + ) + ``` + + :param model_path: sparsezoo stub to a transformers model or (preferred) a + directory containing a model.onnx, tokenizer config, and model config + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + If a list of lengths is provided, then for each length, a model and + tokenizer will be compiled capable of handling that sequence length + (also known as a bucket). Default is 128 + :param doc_stride: if the context is too long to fit with the question for the + model, it will be split in several chunks with some overlap. This argument + controls the size of that overlap. Currently, only reading the first span + is supported (everything after doc_stride will be truncated). Default + is 128 + :param max_question_len: maximum length of the question after tokenization. + It will be truncated if needed. Default is 64 + :param max_answer_len: maximum length of answer after decoding. Default is 15 + """ + return Pipeline.create("question_answering", *args, **kwargs) + + +def text_classification_pipeline(*args, **kwargs) -> "Pipeline": + """ + transformers text classification pipeline + + example instantiation: + ```python + text_classifier = Pipeline.create( + task="text_classification", + model_path="text_classification_model_dir/", + batch_size=BATCH_SIZE, + ) + ``` + + example batch size 1, single text inputs (ie sentiment analysis): + ```python + sentiment = text_classifier("the food tastes great") + sentiment = text_classifier(["the food tastes great"]) + sentiment = text_classifier([["the food tastes great"]]) + ``` + + example batch size 1, multi text input (ie QQP like tasks): + ```python + prediction = text_classifier([["how is the food?", "what is the food?"]]) + ``` + + example batch size n, single text inputs: + ```python + sentiments = text_classifier(["the food tastes great", "the food tastes bad"]) + sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]]) + ``` + + :param model_path: sparsezoo stub to a transformers model or (preferred) a + directory containing a model.onnx, tokenizer config, and model config + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + If a list of lengths is provided, then for each length, a model and + tokenizer will be compiled capable of handling that sequence length + (also known as a bucket). Default is 128 + :param return_all_scores: if True, instead of returning the prediction as the + argmax of model class predictions, will return all scores and labels as + a list for each result in the batch. Default is False + """ + return Pipeline.create("text_classification", *args, **kwargs) + + +def sentiment_analysis_pipeline(*args, **kwargs) -> "Pipeline": + """ + transformers text classification pipeline + + example instantiation: + ```python + text_classifier = Pipeline.create( + task="text_classification", + model_path="text_classification_model_dir/", + batch_size=BATCH_SIZE, + ) + ``` + + example batch size 1, single text inputs (ie sentiment analysis): + ```python + sentiment = text_classifier("the food tastes great") + sentiment = text_classifier(["the food tastes great"]) + sentiment = text_classifier([["the food tastes great"]]) + ``` + + example batch size 1, multi text input (ie QQP like tasks): + ```python + prediction = text_classifier([["how is the food?", "what is the food?"]]) + ``` + + example batch size n, single text inputs: + ```python + sentiments = text_classifier(["the food tastes great", "the food tastes bad"]) + sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]]) + ``` + + :param model_path: sparsezoo stub to a transformers model or (preferred) a + directory containing a model.onnx, tokenizer config, and model config + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + If a list of lengths is provided, then for each length, a model and + tokenizer will be compiled capable of handling that sequence length + (also known as a bucket). Default is 128 + :param return_all_scores: if True, instead of returning the prediction as the + argmax of model class predictions, will return all scores and labels as + a list for each result in the batch. Default is False + """ + return Pipeline.create("text_classification", *args, **kwargs) + + +def token_classification_pipeline(*args, **kwargs) -> "Pipeline": + """ + transformers token classification pipeline + + example instantiation: + ```python + token_classifier = Pipeline.create( + task="token_classification", + model_path="token_classification_model_dir/", + batch_size=BATCH_SIZE, + ) + ``` + + :param model_path: sparsezoo stub to a transformers model or (preferred) a + directory containing a model.onnx, tokenizer config, and model config + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + If a list of lengths is provided, then for each length, a model and + tokenizer will be compiled capable of handling that sequence length + (also known as a bucket). Default is 128 + :param aggregation_strategy: how to aggregate tokens in postprocessing. Options + include 'none', 'simple', 'first', 'average', and 'max'. Default is None + :param ignore_labels: list of label names to ignore in output. Default is + ['0'] which ignores the default known class label + """ + return Pipeline.create("token_classification", *args, **kwargs) + + +def image_classification_pipeline(*args, **kwargs) -> "Pipeline": + """ + Image classification pipeline for DeepSparse + + :param model_path: path on local system or SparseZoo stub to load the model from + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param class_names: Optional dict, or json file of class names to use for + mapping class ids to class labels. Default is None + """ + return Pipeline.create("image_classification", *args, **kwargs) + + +def yolo_pipeline(*args, **kwargs) -> "Pipeline": + """ + Image Segmentation YOLO pipeline for DeepSparse + + :param model_path: path on local system or SparseZoo stub to load the model from + :param engine_type: inference engine to use. Currently supported values + include 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param class_names: Optional string identifier, dict, or json file of + class names to use for mapping class ids to class labels. Default is + `coco` + """ + return Pipeline.create("yolo", *args, **kwargs) + + +def haystack_pipeline(*args, **kwargs) -> "Pipeline": + """ + Neural Magic pipeline for running Haystack DocumentSearchPipeline. + Supports selected Haystack Nodes as well as Haystack nodes integrated + with the Neural Magic DeepSparse Engine + + example embedding model instantiation: + ```python + haystack_pipeline = Pipeline.create( + task="information_retrieval_haystack", + model_path="masked_language_modeling_model_dir/", + config={ + "document_store": "InMemoryDocumentStore", + "document_store_args": { + "similarity": "cosine", + "use_gpu": False, + }, + "retriever": "DeepSparseEmbeddingRetriever", + "retriever_args": { + "extraction_strategy": "reduce_mean" + } + }, + ) + ``` + + example deepsparse biencoder instantiation + ```python + haystack_pipeline = Pipeline.create( + task="information_retrieval_haystack", + config={ + "document_store": "InMemoryDocumentStore", + "document_store_args": { + "similarity": "cosine", + "use_gpu": False, + }, + "retriever": "DeepSparseDensePassageRetriever", + "retriever_args": { + "query_model_path": "./query_model", + "passage_model_path": "./passage_model" + } + }, + ) + ``` + + writing documents: + ```python + haystack_pipeline.write_documents([ + { + "title": "Claude Shannon", + "content": "Claude Elwood Shannon was an American mathematician, " + "electrical engineer, and cryptographer known as a father of " + "information theory. He was a 21-year-old master's degree student at " + "the Massachusetts Institute of Technology (MIT)." + }, + { + "title": "Vincent van Gogh", + "content": "Van Gogh was born into an upper-middle-class family. " + "As a child he was serious, quiet and thoughtful. He began drawing " + "at an early age and as a young man worked as an art dealer." + }, + { + "title": "Stevie Wonder", + "content": "Stevland Hardaway Morris, known professionally as " + "Stevie Wonder, is an American singer and musician, who is " + "credited as a pioneer and influence by musicians across a range " + "of genres." + } + ]) + ``` + + example queries: + ```python + from deepsparse.transformers.haystack import print_pipeline_documents + pipeline_outputs = haystack_pipeline( + queries="who invented information theory", + params={"Retriever": {"top_k": 4}} + ) + print_pipeline_documents(pipeline_outputs) + + pipeline_outputs = haystack_pipeline( + queries=[ + "famous artists", + "What is Stevie Wonder's real name?" + ], + params={"Retriever": {"top_k": 4}} + ) + print_pipeline_documents(pipeline_outputs) + ``` + + :param model_path: sparsezoo stub to a transformers model or (preferred) a + directory containing a model.onnx, tokenizer config, and model config + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + If a list of lengths is provided, then for each length, a model and + tokenizer will be compiled capable of handling that sequence length + (also known as a bucket). Default is 128 + :param docs: list of documents to be written to document_store. Can also + be written after instantiation with write_documents method. + Default is None + :param config: dictionary or instance of HaystackPipelineConfig. Used to + specify Haystack node arguments + :param retriever_kwargs: keyword arguments to be passed to retriever. If + the retriever is a deepsparse retriever, then these arguments will also + be passed to the TransformersEmbeddingExtractionPipeline of the retriever + """ + return Pipeline.create("information_retrieval_haystack", *args, **kwargs) + + +def embedding_extraction_pipeline(*args, **kwargs) -> "Pipeline": + """ + embedding extraction pipeline for extracting intermediate layer embeddings + from transformer models + + example instantiation: + ```python + embedding_extraction_pipeline = Pipeline.create( + task="embedding_extraction", + model_path="masked_language_modeling_model_dir/", + ) + results = embedding_extraction_pipeline( + [ + "the warriors have won the nba finals" + "the warriors are the greatest basketball team ever" + ] + ) + emb_1, emb_2 = results.embeddings + # (expect emb_1 and emb_2 to have high cosine similiarity) + ``` + + :param model_path: sparsezoo stub to a transformers model or (preferred) a + directory containing a model.onnx, tokenizer config, and model config + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: static batch size to use for inference. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + If a list of lengths is provided, then for each length, a model and + tokenizer will be compiled capable of handling that sequence length + (also known as a bucket). Default is 128 + :param emb_extraction_layer: if an int, the transformer layer number from + which the embeddings will be extracted. If a string, the name of last + ONNX node in model to draw embeddings from. If None, leave the model + unchanged. Default is -1 (last transformer layer before prediction head) + :param model_size: size of transformer model (size of hidden layer per token + if the model is cut). Default is 768 + :param extraction_strategy: method of pooling embedding values. Currently + supported values are 'per_token', 'reduce_mean', 'reduce_max' and 'cls_token'. + Default is 'per_token' + :param return_numpy: return embeddings a list of numpy arrays, list of lists + of floats otherwise. Default is True + :param context: context for engine. If None, then the engine will be initialized + with 2 streams to make use of parallel inference of labels. Default is None + """ + return Pipeline.create("embedding_extraction", *args, **kwargs) + + +def zero_shot_text_classification_pipeline(*args, **kwargs) -> "Pipeline": + """ + Transformers zero shot text classification pipeline. This pipeline allows for + text classification using models which were trained on datasets not originally + meant for this task. + + This class upon construction returns an instance of a child Pipeline which + inherits from ZeroShotTextClassificationPipelineBase. Which type of Pipeline + is returned depends on the value of the passed model_scheme argument. + + example dynamic labels: + ```python + zero_shot_text_classifier = Pipeline.create( + task="zero_shot_text_classification", + model_scheme="mnli", + model_config={"hypothesis_template": "This text is related to {}"}, + model_path="mnli_model_dir/", + ) + + sequence_to_classify = "Who are you voting for in 2020?" + candidate_labels = ["Europe", "public health", "politics"] + zero_shot_text_classifier(sequences=sequence_to_classify, labels=candidate_labels) + >>> ZeroShotTextClassificationOutput( + sequences='Who are you voting for in 2020?', + labels=['politics', 'public health', 'Europe'], + scores=[0.9073666334152222, 0.046810582280159, 0.04582275450229645]) + ``` + + example static labels: + ```python + zero_shot_text_classifier = Pipeline.create( + task="zero_shot_text_classification", + model_scheme="mnli", + model_config={"hypothesis_template": "This text is related to {}"}, + model_path="mnli_model_dir/", + labels=["politics", "Europe", "public health"] + ) + + sequence_to_classify = "Who are you voting for in 2020?" + zero_shot_text_classifier(sequences=sequence_to_classify) + >>> ZeroShotTextClassificationOutput( + sequences='Who are you voting for in 2020?', + labels=['politics', 'public health', 'Europe'], + scores=[0.9073666334152222, 0.046810582280159, 0.04582275450229645]) + ``` + + Note that labels must either be provided during pipeline instantiation via + the constructor, at inference time, but not both. + + Note that if a hypothesis_template is provided at inference time, then it + will override the value provided during model instantiation + + :param model_path: sparsezoo stub to a transformers model or (preferred) a + directory containing a model.onnx, tokenizer config, and model config + :param engine_type: inference engine to use. Currently supported values include + 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' + :param batch_size: batch size must divide sequences * labels, regardless of + whether using dynamic or static labels. Default is 1 + :param num_cores: number of CPU cores to allocate for inference engine. None + specifies all available cores. Default is None + :param scheduler: (deepsparse only) kind of scheduler to execute with. + Pass None for the default + :param input_shapes: list of shapes to set ONNX the inputs to. Pass None + to use model as-is. Default is None + :param alias: optional name to give this pipeline instance, useful when + inferencing with multiple models. Default is None + :param sequence_length: sequence length to compile model and tokenizer for. + If a list of lengths is provided, then for each length, a model and + tokenizer will be compiled capable of handling that sequence length + (also known as a bucket). Default is 128 + :param default_model_name: huggingface transformers model name to use to + load a tokenizer and model config when none are provided in the `model_path`. + Default is "bert-base-uncased" + :param model_scheme: training scheme used to train the model used for zero shot. + Default is "mnli" + :param model_config: config object specific to the model_scheme of this model + or a dict of config keyword arguments + :param labels: static list of labels to perform text classification with. Can + also be provided at inference time + :param context: context for engine. If None, then the engine will be initialized + with 2 streams to make use of parallel inference of labels + """ + return Pipeline.create("zero_shot_text_classification", *args, **kwargs) diff --git a/src/deepsparse/legacy/tasks.py b/src/deepsparse/legacy/tasks.py new file mode 100644 index 0000000000..6b23c7d072 --- /dev/null +++ b/src/deepsparse/legacy/tasks.py @@ -0,0 +1,428 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Classes and implementations for supported tasks in the DeepSparse pipeline and system +""" + +import importlib +import logging +import os +import sys +from collections import namedtuple +from typing import Iterable, List, Optional, Tuple + + +_LOGGER = logging.getLogger(__name__) + +__all__ = ["SupportedTasks", "AliasedTask"] + + +class AliasedTask: + """ + A task that can have multiple aliases to match to. + For example, question_answering which can alias to qa as well + + :param name: the name of the task such as question_answering or text_classification + :param aliases: the aliases the task can go by in addition to the name such as + qa, glue, sentiment_analysis, etc + """ + + def __init__(self, name: str, aliases: List[str]): + self._name = name + self._aliases = aliases + + @property + def name(self) -> str: + """ + :return: the name of the task such as question_answering + """ + return self._name + + @property + def aliases(self) -> List[str]: + """ + :return: the aliases the task can go by such as qa, glue, sentiment_analysis + """ + return self._aliases + + def matches(self, task: str) -> bool: + """ + :param task: the name of the task to check whether the given instance matches. + Checks the current name as well as any aliases. + Everything is compared at lower case and "-" and whitespace + are replaced with "_". + :return: True if task does match the current instance, False otherwise + """ + task = task.lower().replace("-", "_") + + # replace whitespace with "_" + task = "_".join(task.split()) + + return task == self.name or task in self.aliases + + +class SupportedTasks: + """ + The supported tasks in the DeepSparse pipeline and system + """ + + nlp = namedtuple( + "nlp", + [ + "question_answering", + "text_classification", + "token_classification", + "zero_shot_text_classification", + "transformers_embedding_extraction", + ], + )( + question_answering=AliasedTask("question_answering", ["qa"]), + text_classification=AliasedTask( + "text_classification", ["glue", "sentiment_analysis"] + ), + token_classification=AliasedTask("token_classification", ["ner"]), + zero_shot_text_classification=AliasedTask("zero_shot_text_classification", []), + transformers_embedding_extraction=AliasedTask( + "transformers_embedding_extraction", [] + ), + ) + + chat = namedtuple("chat", ["chatbot", "chat"])( + chatbot=AliasedTask("chatbot", []), chat=AliasedTask("chat", []) + ) + text_generation = namedtuple( + "text_generation", ["text_generation", "opt", "bloom"] + )( + text_generation=AliasedTask("text_generation", []), + opt=AliasedTask("opt", []), + bloom=AliasedTask("bloom", []), + ) + code_generation = namedtuple("code_generation", ["code_generation", "codegen"])( + code_generation=AliasedTask("code_generation", []), + codegen=AliasedTask("codegen", []), + ) + + image_classification = namedtuple("image_classification", ["image_classification"])( + image_classification=AliasedTask( + "image_classification", + ["image_classification"], + ), + ) + + yolo = namedtuple("yolo", ["yolo"])( + yolo=AliasedTask("yolo", ["yolo"]), + ) + yolov8 = namedtuple("yolov8", ["yolov8"])( + yolov8=AliasedTask("yolov8", ["yolov8"]), + ) + yolact = namedtuple("yolact", ["yolact"])( + yolact=AliasedTask("yolact", ["yolact"]), + ) + + haystack = namedtuple("haystack", ["information_retrieval_haystack"])( + information_retrieval_haystack=AliasedTask( + "information_retrieval_haystack", ["haystack"] + ), + ) + embedding_extraction = namedtuple("embedding_extraction", ["embedding_extraction"])( + embedding_extraction=AliasedTask( + "embedding_extraction", ["embedding_extraction"] + ), + ) + open_pif_paf = namedtuple("open_pif_paf", ["open_pif_paf"])( + open_pif_paf=AliasedTask("open_pif_paf", ["open_pif_paf"]), + ) + + all_task_categories = [ + nlp, + image_classification, + yolo, + yolov8, + yolact, + haystack, + embedding_extraction, + open_pif_paf, + text_generation, + chat, + code_generation, + ] + + @classmethod + def check_register_task( + cls, task: str, extra_tasks: Optional[Iterable[str]] = None + ): + """ + :param task: task name to validate and import dependencies for + :param extra_tasks: valid task names that are not included in supported tasks. + i.e. tasks registered to Pipeline at runtime + """ + if task == "custom": + # custom task, register the CustomPipeline + import deepsparse.pipelines.custom_pipeline # noqa: F401 + + elif cls.is_text_generation(task): + # noqa: F401 + import deepsparse.legacy.transformers.pipelines.text_generation + + elif cls.is_chat(task): + import deepsparse.transformers.pipelines.chat # noqa: F401 + + elif cls.is_code_generation(task): + import deepsparse.transformers.pipelines.code_generation # noqa: F401 + + elif cls.is_nlp(task): + # trigger transformers pipelines to register with Pipeline.register + import deepsparse.transformers.pipelines # noqa: F401 + + elif cls.is_image_classification(task): + # trigger image classification pipelines to + # register with Pipeline.register + import deepsparse.legacy.image_classification.pipelines # noqa: F401 + + elif cls.is_yolact(task): + # trigger yolo pipelines to register with Pipeline.register + import deepsparse.yolact.pipelines # noqa: F401 + + elif cls.is_yolo(task): + # trigger yolo pipelines to register with Pipeline.register + import deepsparse.yolo.pipelines # noqa: F401 + + elif cls.is_yolov8(task): + # trigger yolo pipelines to register with Pipeline.register + import deepsparse.yolov8.pipelines # noqa: F401 + + elif cls.is_haystack(task): + # trigger haystack pipeline as well as transformers pipelines to + # register with Pipeline.register + import deepsparse.transformers.haystack # noqa: F401 + + elif cls.is_embedding_extraction(task): + # trigger embedding_extraction pipelines to register with + # Pipeline.register + import deepsparse.pipelines.embedding_extraction # noqa :F401 + + elif cls.is_open_pif_paf(task): + # trigger embedding_extraction pipelines to register with + # Pipeline.register + import deepsparse.open_pif_paf.pipelines # noqa :F401 + + all_tasks = set(cls.task_names() + (list(extra_tasks or []))) + if task not in all_tasks: + raise ValueError( + f"Unknown Pipeline task {task}. Currently supported tasks are " + f"{list(all_tasks)}" + ) + + @classmethod + def is_chat(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is a chat task + :return: True if it is a chat task, False otherwise + """ + return any(chat_task.matches(task) for chat_task in cls.chat) + + @classmethod + def is_text_generation(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is a text generation task + such as codegen + :return: True if it is a text generation task, False otherwise + """ + return any( + text_generation_task.matches(task) + for text_generation_task in cls.text_generation + ) + + @classmethod + def is_code_generation(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is a text generation task + such as codegen + :return: True if it is a text generation task, False otherwise + """ + return any( + code_generation_task.matches(task) + for code_generation_task in cls.code_generation + ) + + @classmethod + def is_nlp(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is an nlp task + such as question_answering + :return: True if it is an nlp task, False otherwise + """ + return any([nlp_task.matches(task) for nlp_task in cls.nlp]) + + @classmethod + def is_cv(cls, task: str) -> bool: + return ( + cls.is_yolo(task) + or cls.is_yolov8(task) + or cls.is_yolact(task) + or cls.is_image_classification(task) + or cls.is_open_pif_paf(task) + ) + + @classmethod + def is_image_classification(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is an image + classification task + :return: True if it is an image classification task, False otherwise + """ + return any([ic_task.matches(task) for ic_task in cls.image_classification]) + + @classmethod + def is_yolo(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is an image + segmentation task using YOLO + :return: True if it is an segmentation task using YOLO, False otherwise + """ + return any([yolo_task.matches(task) for yolo_task in cls.yolo]) + + @classmethod + def is_yolov8(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is an image + segmentation task using YOLOv8 + :return: True if it is an segmentation task using YOLOv8, False otherwise + """ + return any([yolov8_task.matches(task) for yolov8_task in cls.yolov8]) + + @classmethod + def is_yolact(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is an image + segmentation task using YOLO + :return: True if it is an segmentation task using YOLO, False otherwise + """ + return any([yolact_task.matches(task) for yolact_task in cls.yolact]) + + @classmethod + def is_haystack(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is a haystack task + :return: True if it is a haystack task, False otherwise + """ + return any([haystack_task.matches(task) for haystack_task in cls.haystack]) + + @classmethod + def is_embedding_extraction(cls, task): + """ + :param task: the name of the task to check whether it is an + embedding_extraction task + :return: True if it is an embedding_extraction task, False otherwise + """ + return any( + embedding_extraction_task.matches(task) + for embedding_extraction_task in cls.embedding_extraction + ) + + @classmethod + def is_open_pif_paf(cls, task): + """ + :param task: the name of the task to check whether it is an + embedding_extraction task + :return: True if it is an open_pif_paf task, False otherwise + """ + return any( + open_pif_paf_task.matches(task) for open_pif_paf_task in cls.open_pif_paf + ) + + @classmethod + def task_names(cls): + task_names = ["custom"] + for task_category in cls.all_task_categories: + for task in task_category: + unique_aliases = ( + alias for alias in task._aliases if alias != task._name + ) + task_names += (task._name, *unique_aliases) + return task_names + + +def dynamic_import_task(module_or_path: str) -> str: + """ + Dynamically imports `module` with importlib, and returns the `TASK` + attribute on the module (something like `importlib.import_module(module).TASK`). + + Example contents of `module`: + ```python + from deepsparse.pipeline import Pipeline + from deepsparse.transformers.pipelines.question_answering import ( + QuestionAnsweringPipeline, + ) + + TASK = "my_qa_task" + Pipeline.register(TASK)(QuestionAnsweringPipeline) + ``` + + NOTE: this modifies `sys.path`. + + :raises FileNotFoundError: if path does not exist + :raises RuntimeError: if the imported module does not contain `TASK` + :raises RuntimeError: if the module doesn't register the task + :return: The task from the imported module. + """ + parent_dir, module_name = _split_dir_and_name(module_or_path) + if not os.path.exists(os.path.join(parent_dir, module_name + ".py")): + raise FileNotFoundError( + f"Unable to find file for {module_or_path}. " + f"Looked for {module_name}.py under {parent_dir if parent_dir else '.'}" + ) + + # add parent_dir to sys.path so we can import the file as a module + sys.path.append(os.curdir) + if parent_dir: + _LOGGER.info(f"Adding {parent_dir} to sys.path") + sys.path.append(parent_dir) + + # do the import + _LOGGER.info(f"Importing '{module_name}'") + module_or_path = importlib.import_module(module_name) + + if not hasattr(module_or_path, "TASK"): + raise RuntimeError( + "When using --task import:, " + "module must set the `TASK` attribute." + ) + + task = getattr(module_or_path, "TASK") + _LOGGER.info(f"Using task={repr(task)}") + + return task + + +def _split_dir_and_name(module_or_path: str) -> Tuple[str, str]: + """ + Examples: + - `a` -> `("", "a")` + - `a.b` -> `("a", "b")` + - `a.b.c` -> `("a/b", "c")` + + :return: module split into directory & name + """ + if module_or_path.endswith(".py"): + # assume path + split_char = os.sep + module_or_path = module_or_path.replace(".py", "") + else: + # assume module + split_char = "." + *dirs, module_name = module_or_path.split(split_char) + parent_dir = os.sep if dirs == [""] else os.sep.join(dirs) + return parent_dir, module_name diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/__init__.py b/src/deepsparse/legacy/transformers/__init__.py similarity index 100% rename from tests/deepsparse/transformers/pipelines/integration_tests/__init__.py rename to src/deepsparse/legacy/transformers/__init__.py diff --git a/src/deepsparse/v2/__init__.py b/src/deepsparse/legacy/transformers/pipelines/__init__.py similarity index 82% rename from src/deepsparse/v2/__init__.py rename to src/deepsparse/legacy/transformers/pipelines/__init__.py index 5fd33a9503..a1657d1025 100644 --- a/src/deepsparse/v2/__init__.py +++ b/src/deepsparse/legacy/transformers/pipelines/__init__.py @@ -1,5 +1,3 @@ -# flake8: noqa - # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .operators import * -from .pipeline import * -from .routers import * -from .schedulers import * -from .task import * -from .utils import * +# flake8: noqa + +from .text_generation import * diff --git a/src/deepsparse/transformers/pipelines/text_generation.py b/src/deepsparse/legacy/transformers/pipelines/text_generation.py similarity index 87% rename from src/deepsparse/transformers/pipelines/text_generation.py rename to src/deepsparse/legacy/transformers/pipelines/text_generation.py index 20d08a5f3b..cdd429300c 100644 --- a/src/deepsparse/transformers/pipelines/text_generation.py +++ b/src/deepsparse/legacy/transformers/pipelines/text_generation.py @@ -18,29 +18,24 @@ import os import pathlib import warnings -from enum import Enum -from typing import ( - Any, - Callable, - Dict, - Generator, - List, - Optional, - Sequence, - Tuple, - Type, - Union, -) +from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Type, Union import numpy import onnx -from pydantic import BaseModel, Field +from pydantic import BaseModel from transformers import GenerationConfig -from deepsparse import Pipeline -from deepsparse.pipeline import DEEPSPARSE_ENGINE +from deepsparse.legacy import Pipeline +from deepsparse.legacy.pipeline import DEEPSPARSE_ENGINE from deepsparse.transformers.engines import NLDecoderEngine from deepsparse.transformers.pipelines import TransformersPipeline +from deepsparse.transformers.schemas.text_generation_schemas import ( + FinishReason, + GeneratedText, + GenerationDefaults, + TextGenerationInput, + TextGenerationOutput, +) from deepsparse.transformers.utils import DecoderKVCache from deepsparse.transformers.utils.helpers import ( check_and_return_generation_config, @@ -63,152 +58,6 @@ __all__ = ["TextGenerationPipeline"] -# Based off of https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig # noqa E501 -class GenerationDefaults: - # Parameters that control the length of the output - max_length = None - max_new_tokens = 100 - # Parameters that control the generation strategy used - do_sample = False - # Parameters for manipulation of the model output logits - temperature = 1.0 - top_k = 50 - top_p = 1.0 - repetition_penalty = 1.0 - # Parameters that define the outputs - num_return_sequences = 1 - output_scores = False - - -class FinishReason(Enum): - STOP = "stop" - LENGTH = "length" - TIME = "time" - CALLBACK = "callback" - CAPACITY = "capacity" - MAX_NEW_TOKENS = "max_new_tokens" - - -class TextGenerationInput(BaseModel): - class Config: - arbitrary_types_allowed = True - - sequences: Union[str, List[str]] = Field( - alias="prompt", - description="The input sequences to generate the text from.", - ) - return_input_tokens: bool = Field( - default=False, - description="A flag that indicates whether to return " "the input_tokens. ", - ) - include_prompt_logits: bool = Field( - default=False, - description="A flag that indicates whether to return " - "the logits for the prompt. If set, prompt_logits are " - "`prepended` to the logits for the generated text sequence." - "Note: This flag is only applicable when output_scores " - "is `True`.", - ) - fixed_sequences_length: bool = Field( - default=False, - description="A flag that indicates whether to modify " - "(pad or truncate) each input text sequence, so that " - "its tokenized length is equal to `sequence_length` " - "of tokens. Useful, when a batch of predictions needs " - "to have consistent length so one " - "can compute metric in a batched fashion. ", - ) - streaming: bool = Field( - default=False, - description="Whether to stream the results back as they are generated. If " - "True, then the results are returned as a generator object which yields " - "the results as they are generated. If False, then the results are returned " - "as a list after it has completed.", - ) - callback: Optional[Callable[[Any], Union[bool, Any]]] = Field( - default=None, - description="Callable that will be invoked " - "on each generated token. If the callable returns " - "`False`, the generation will stop. Default is `None`.", - ) - stop: Union[None, str, Sequence[str]] = Field( - default=None, - description="A string or a list of strings that will be used as" - " stop tokens. (token generation will stop when any of the stop" - " tokens is generated). Set to `None` to ignore this parameter." - " Default is `None`.", - ) - - presence_penalty: Optional[float] = Field( - default=0.0, - description="Penalty applied for generating new token. Any existing" - " token results in the subtraction of its corresponding logit value." - " Default set to 0.0", - ) - - generation_config: Union[None, str, pathlib.Path, Dict, GenerationConfig] = Field( - default=None, - description="GenerationConfig file consisting of parameters used to control " - "sequences generated for each prompt. The current supported parameters are: " - "max_length, max_new_tokens, num_return_sequences, output_scores, top_p, " - "top_k, repetition_penalty, do_sample, temperature. If None is provided, " - "deepsparse defaults will be used. For all other input types, HuggingFace " - "defaults for GenerationConfig will be used. ", - ) - - generation_kwargs: Optional[Dict] = Field( - default=None, - description="Any arguments to override generation_config arguments. Refer to " - "the generation_config argument for a full list of supported variables.", - ) - - -class GeneratedText(BaseModel): - text: str = Field( - description="The generated sequence for a given prompt. If " - "streaming is enabled, this will be the next generated token." - ) - score: Optional[Any] = Field( - default=None, - description="The score for the generated token or sequence. " - "The scores have the shape [sequence_length, vocab_size]", - ) - finished: bool = Field(description="Whether generation has stopped.") - finished_reason: Optional[str] = Field( - default=None, - description="The reason for generation to stop. " - "Defined by FinishReason. One of stop, length, or time.", - ) - - -# TODO: Pydantic aliases allow assignment but not reference. Still need to update. -class TextGenerationOutput(BaseModel): - created: datetime.datetime = Field(description="Time of inference creation.") - prompts: Union[str, List[str]] = Field( - description="Prompts used for the sequence generation. For multiple input " - "prompts, a list of prompts is returned" - ) - generations: Union[List[GeneratedText], List[List[GeneratedText]]] = Field( - description="For a single prompt, a single list of GeneratedText is returned. " - "If multiple prompts are given, a list of GeneratedText is returned for each " - "prompt provided. If streamng is enabled, the next generated token is returned." - "Otherwise, the full generated sequence is returned." - ) - input_tokens: Optional[ - Any - ] = Field( # dictionary mapping "token_ids" and "attention_mask" to numpy arrays - default=None, - description="The output of the tokenizer." - "Dictionary containing token_ids and attention_mask, " - "both mapping to arrays of size " - "[batch_size, sequence_length]", - ) - - class Config: - arbitrary_types_allowed = True - extra = "allow" - - @Pipeline.register( task="text_generation", task_aliases=["opt", "bloom"], diff --git a/src/deepsparse/open_pif_paf/pipelines.py b/src/deepsparse/open_pif_paf/pipelines.py index f3a015ace2..995ca68b33 100644 --- a/src/deepsparse/open_pif_paf/pipelines.py +++ b/src/deepsparse/open_pif_paf/pipelines.py @@ -22,12 +22,12 @@ import cv2 import torch +from deepsparse.legacy.pipeline import Pipeline from deepsparse.open_pif_paf.schemas import ( OpenPifPafFields, OpenPifPafInput, OpenPifPafOutput, ) -from deepsparse.pipeline import Pipeline from deepsparse.utils import model_to_path from deepsparse.yolact.utils import preprocess_array from openpifpaf import decoder, network diff --git a/src/deepsparse/v2/operators/__init__.py b/src/deepsparse/operators/__init__.py similarity index 100% rename from src/deepsparse/v2/operators/__init__.py rename to src/deepsparse/operators/__init__.py diff --git a/src/deepsparse/v2/operators/engine_operator.py b/src/deepsparse/operators/engine_operator.py similarity index 95% rename from src/deepsparse/v2/operators/engine_operator.py rename to src/deepsparse/operators/engine_operator.py index 630de2d5bd..f39c55eab9 100644 --- a/src/deepsparse/v2/operators/engine_operator.py +++ b/src/deepsparse/operators/engine_operator.py @@ -20,16 +20,24 @@ from deepsparse import Context as EngineContext from deepsparse import Engine, MultiModelEngine, Scheduler from deepsparse.benchmark import ORTEngine +from deepsparse.operators import Operator from deepsparse.utils import join_engine_outputs, model_to_path, split_engine_inputs -from deepsparse.v2.operators import Operator DEEPSPARSE_ENGINE = "deepsparse" ORT_ENGINE = "onnxruntime" +TORCHSCRIPT_ENGINE = "torchscript" SUPPORTED_PIPELINE_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE] -__all__ = ["EngineOperator", "EngineOperatorInputs", "EngineOperatorOutputs"] +__all__ = [ + "DEEPSPARSE_ENGINE", + "ORT_ENGINE", + "TORCHSCRIPT_ENGINE", + "EngineOperator", + "EngineOperatorInputs", + "EngineOperatorOutputs", +] class EngineOperatorInputs(BaseModel): @@ -86,6 +94,7 @@ class EngineOperator(Operator): def __init__( self, model_path: str, + batch_size: int = 1, engine_type: str = DEEPSPARSE_ENGINE, num_cores: int = None, num_streams: int = None, @@ -96,7 +105,7 @@ def __init__( ): self.model_path = model_to_path(model_path) self.engine_context = engine_context - self._batch_size = 1 + self._batch_size = batch_size if self.engine_context is not None: num_cores = num_cores or self.engine_context.num_cores diff --git a/src/deepsparse/v2/operators/operator.py b/src/deepsparse/operators/operator.py similarity index 93% rename from src/deepsparse/v2/operators/operator.py rename to src/deepsparse/operators/operator.py index e775056f8f..3fb9336c5c 100644 --- a/src/deepsparse/v2/operators/operator.py +++ b/src/deepsparse/operators/operator.py @@ -17,8 +17,8 @@ from pydantic import BaseModel -from deepsparse.v2.operators.registry import OperatorRegistry -from deepsparse.v2.utils import InferenceState +from deepsparse.operators.registry import OperatorRegistry +from deepsparse.utils import InferenceState __all__ = ["Operator"] @@ -101,8 +101,9 @@ def __call__( return self.output_schema(**run_output) return run_output - @staticmethod + @classmethod def create( + cls, task: str, **kwargs, ) -> "Operator": @@ -112,7 +113,10 @@ def create( implementation :return: operator object initialized for the given task """ - operator_constructor = OperatorRegistry.get_task_constructor(task) + try: + operator_constructor = OperatorRegistry.get_task_constructor(task) + except Exception: + raise return operator_constructor(**kwargs) @abstractmethod diff --git a/src/deepsparse/v2/operators/registry.py b/src/deepsparse/operators/registry.py similarity index 89% rename from src/deepsparse/v2/operators/registry.py rename to src/deepsparse/operators/registry.py index 1b83b20728..484cff75a2 100644 --- a/src/deepsparse/v2/operators/registry.py +++ b/src/deepsparse/operators/registry.py @@ -14,7 +14,7 @@ from typing import Type -from deepsparse.v2.task import SupportedTasks, dynamic_import_task +from deepsparse.tasks import SupportedTasks, dynamic_import_task from sparsezoo.utils.registry import ( RegistryMixin, get_from_registry, @@ -34,7 +34,7 @@ class OperatorRegistry(RegistryMixin): @classmethod def register_value(cls, operator, name): - from deepsparse.v2.operators import Operator + from deepsparse.operators import Operator if not isinstance(name, list): name = [name] @@ -59,7 +59,7 @@ def get_task_constructor(cls, task: str) -> Type["Operator"]: # noqa: F821 :return: The class registered to `task` :raises ValueError: if `task` was not registered via `OperatorRegistry.register` """ - from deepsparse.v2.operators import Operator + from deepsparse.operators import Operator if task.startswith("import:"): # dynamically import the task from a file @@ -72,5 +72,8 @@ def get_task_constructor(cls, task: str) -> Type["Operator"]: # noqa: F821 tasks = registered_names(Operator) # step needed to import relevant files required to load the operator - SupportedTasks.check_register_task(task, tasks) + try: + SupportedTasks.check_register_task(task, tasks) + except Exception: + raise return get_from_registry(Operator, task) diff --git a/src/deepsparse/pipeline.py b/src/deepsparse/pipeline.py index 90f23654ba..671750e23e 100644 --- a/src/deepsparse/pipeline.py +++ b/src/deepsparse/pipeline.py @@ -12,1333 +12,372 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Classes and registry for end to end inference pipelines that wrap an underlying -inference engine and include pre/postprocessing -""" -import os -from abc import ABC, abstractmethod -from concurrent.futures import ThreadPoolExecutor -from functools import partial -from pathlib import Path -from typing import Any, Dict, Generator, List, Optional, Tuple, Type, Union - -import numpy -from pydantic import BaseModel, Field - -from deepsparse import Context, Engine, MultiModelEngine, Scheduler -from deepsparse.base_pipeline import _REGISTERED_PIPELINES, BasePipeline, SupportedTasks -from deepsparse.benchmark import ORTEngine, TorchScriptEngine -from deepsparse.cpu import cpu_details -from deepsparse.loggers.base_logger import BaseLogger -from deepsparse.loggers.constants import MetricCategories, SystemGroups -from deepsparse.utils import ( - InferenceStages, - StagedTimer, - TimerManager, - join_engine_outputs, - split_engine_inputs, +import asyncio +import copy +from typing import Any, Dict, List, Optional, Union + +from deepsparse.operators import EngineOperator, Operator +from deepsparse.routers import Router +from deepsparse.schedulers import ( + ContinuousBatchingScheduler, + OperatorScheduler, + SchedulerGroup, ) +from deepsparse.utils import InferenceState, PipelineState +from deepsparse.utils.helpers import run_func +from deepsparse.utils.subgraph import SubGraph -__all__ = [ - "DEEPSPARSE_ENGINE", - "ORT_ENGINE", - "TORCHSCRIPT_ENGINE", - "SUPPORTED_PIPELINE_ENGINES", - "Pipeline", - "BasePipeline", - "SupportedTasks", - "_REGISTERED_PIPELINES", - "PipelineConfig", - "question_answering_pipeline", - "text_classification_pipeline", - "zero_shot_text_classification_pipeline", - "token_classification_pipeline", - "image_classification_pipeline", - "yolo_pipeline", - "Bucketable", - "BucketingPipeline", - "create_engine", - "TextGeneration", - "CodeGeneration", - "Chat", -] - -DEEPSPARSE_ENGINE = "deepsparse" -ORT_ENGINE = "onnxruntime" -TORCHSCRIPT_ENGINE = "torchscript" - -SUPPORTED_PIPELINE_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE] +__all__ = ["Pipeline"] -class Pipeline(BasePipeline): +class Pipeline(Operator): """ - Generic Pipeline abstract class meant to wrap inference engine objects to include - data pre/post-processing. Inputs and outputs of pipelines should be serialized - as pydantic Models. See the BasePipeline above for additional parameters provided - during inference. - - Pipelines should not be instantiated by their constructors, but rather the - `Pipeline.create()` method. The task name given to `create` will be used to - load the appropriate pipeline. When creating a Pipeline, the pipeline should - inherit from `Pipeline` and implement the `setup_onnx_file_path`, `process_inputs`, - `process_engine_outputs`, `input_schema`, and `output_schema` abstract methods. - - Finally, the class definition should be decorated by the `Pipeline.register` - function. This defines the task name and task aliases for the pipeline and - ensures that it will be accessible by `Pipeline.create`. The implemented - `Pipeline` subclass must be imported at runtime to be accessible. - - Pipeline lifecycle: - - On instantiation - * `onnx_file_path` <- `setup_onnx_file_path` - * `engine` <- `_initialize_engine` - - - on __call__: - * `parsed_inputs: input_schema` <- `parse_inputs(*args, **kwargs)` - * `pre_processed_inputs` <- `process_inputs(parsed_inputs)` - * `engine_outputs` <- `engine(pre_processed_inputs)` - * `outputs: output_schema` <- `process_engine_outputs(engine_outputs)` + Pipeline accepts a series of operators, schedulers, and a router. Calling a pipeline + will use the router to run through all the defined operators. The operators should + be implemented using the Operator class and each implemented operator should be + responsible for a functional component of the pipelines. The flow of inputs/outputs + between the operators and the steps in the pipeline should be defined by the router, + (based off of the Router class), which dicates the next operator in the pipeline. + Execution of the operators will be handled by the provided schedulers. - Example use of register: - ```python - @Pipeline.register( - task="example_task", - task_aliases=["example_alias_1", "example_alias_2"], - ) - class PipelineImplementation(Pipeline): - # implementation of Pipeline abstract methods here - ``` + :param ops: Operators to run within the pipeline. Can either be a list of operators + or dictionary of operators. + :param router: A Router which dictates the next operator to call. + :param schedulers: A list of schedulers to run operators. + :param pipeline_state: pipeline_state created during pipeline initialization - Example use of pipeline: - ```python - example_pipeline = Pipeline.create( - task="example_task", - model_path="model.onnx", - ) - pipeline_outputs = example_pipeline(pipeline_inputs) - ``` - - :param model_path: path on local system or SparseZoo stub to load the model from - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. None represents - dynamic batch mode (Pipeline will accept any batch size). Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param num_streams: The max number of requests the model can handle - concurrently. None or 0 implies a scheduler-defined default value; - default None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param context: Optional Context object to use for creating instances of - MultiModelEngine. The Context contains a shared scheduler along with - other runtime information that will be used across instances of the - MultiModelEngine to provide optimal performance when running multiple - models concurrently - :param executor: An optional ThreadPoolExecutor() object, if provided the - pipeline executes inference requests in a non-blocking manner and returns - a Future object, call Future.result() on returned object to get the result. - Can also accept an int number of workers, a ThreadPoolExecutor object is - auto-initialized with the specified integer in that case; None represents - synchronous execution - if running in dynamic batch mode a default - ThreadPoolExecutor with default workers equal to the number of available - cores / 2 """ def __init__( self, - model_path: str, - engine_type: str = DEEPSPARSE_ENGINE, - batch_size: Optional[int] = 1, - num_cores: int = None, - num_streams: int = None, - scheduler: Scheduler = None, - input_shapes: List[List[int]] = None, - context: Optional[Context] = None, - executor: Optional[Union[ThreadPoolExecutor, int]] = None, - benchmark: bool = False, - _delay_engine_initialize: bool = False, # internal use only - **kwargs, + ops: Union[Dict[str, Operator], List[Operator]], + router: Router, + schedulers: List[OperatorScheduler], + continuous_batching_scheduler: Optional[ContinuousBatchingScheduler] = None, + pipeline_state: Optional[PipelineState] = None, ): - self._benchmark = benchmark - self._model_path_orig = model_path - self._model_path = model_path - self._engine_type = engine_type - self._batch_size = batch_size - self._timer_manager = TimerManager(enabled=True, multi=benchmark) - self.context = context - super().__init__(**kwargs) - - self.executor, self._num_async_workers = _initialize_executor_and_workers( - batch_size=batch_size, - workers_or_executor=executor, - ) - if self.context is not None: - num_cores = num_cores or self.context.num_cores - if self.context.num_cores != num_cores: - raise ValueError( - f"num_cores mismatch. Expected {self.context.num_cores} " - f"from passed context, but got {num_cores} while " - f"instantiating Pipeline" - ) - - self._engine_args = dict( - batch_size=self._batch_size or 1, # bs=1 for dynamic batch - num_cores=num_cores, - input_shapes=input_shapes, - ) - if engine_type.lower() == DEEPSPARSE_ENGINE: - self._engine_args["scheduler"] = scheduler - self._engine_args["num_streams"] = num_streams + self.ops = ops + self.router = router + self.schedulers = schedulers + self.pipeline_state = pipeline_state + self._continuous_batching_scheduler = continuous_batching_scheduler + self.validate() - self.onnx_file_path = self.setup_onnx_file_path() + self._scheduler_group = SchedulerGroup(self.schedulers) - if _delay_engine_initialize: - self.engine = None + def _run_next( + self, inp: Any, inference_state: InferenceState, next_step: str, **kwargs + ): + if ( + isinstance(self.ops[next_step], EngineOperator) + and self._continuous_batching_scheduler + ): + func = self._continuous_batching_scheduler.submit + inp = self.ops[next_step].input_schema(**inp) else: - self.engine = self._initialize_engine() - self._batch_size = self._batch_size or 1 - - self.log( - identifier=f"{SystemGroups.INFERENCE_DETAILS}/num_cores_total", - value=num_cores, - category=MetricCategories.SYSTEM, + func = self._scheduler_group.submit + + return run_func( + func=func, + operator=self.ops[next_step], + inp=inp, + pipeline_state=self.pipeline_state, + inference_state=inference_state, + **kwargs, ) - def __call__(self, *args, **kwargs) -> BaseModel: - with self.timer_manager.new_timer_context() as timer: - if "engine_inputs" in kwargs: - raise ValueError( - "invalid kwarg engine_inputs. engine inputs determined " - f"by {self.__class__.__qualname__}.parse_inputs" - ) - - # ------ PREPROCESSING ------ - timer.start(InferenceStages.PRE_PROCESS) - # parse inputs into input_schema - pipeline_inputs = self.parse_inputs(*args, **kwargs) - self.log( - identifier="pipeline_inputs", - value=pipeline_inputs, - category=MetricCategories.DATA, + async def _run_sub_graphs( + self, + sub_graph_inputs: List[Any], + sub_graphs: List[SubGraph], + loop: Optional[asyncio.AbstractEventLoop] = None, + ) -> List[Any]: + """ + Run a list of sub_graphs asynchronously. Polls to identify the sub graph that is + still running but has completed its current step. Schedules the next step + subgraph step. This is repeated until all subgraphs have finished running and + have reached their end step (stored in the Subgraph.end attribute). + + :param sub_graph_inputs: A list of inputs that should be passed to each + subgraph. Each subgraph is given an element of the list as input to its + first node. + :param sub_graphs: A list of Subgraph objects. Each stores the relevant + execution information for the particular subgraph, such as its current step + in the sub graph, inference state, output, and end step. + + :returns: a list of outputs for all the completed Subgraph objects. Returned + in the same order that the subgraphs were passed to the function. + """ + for i in range(len(sub_graphs)): + sub_graphs[i].output = self._run_next( + sub_graph_inputs[i], sub_graphs[i].inf, sub_graphs[i].step, loop=loop ) - if not isinstance(pipeline_inputs, self.input_schema): - raise RuntimeError( - f"Unable to parse {self.__class__} inputs into a " - f"{self.input_schema} object. " - f"Inputs parsed to {type(pipeline_inputs)}" + # Execute all sub graphs until all graphs have been completed. + while any(not x.completed for x in sub_graphs): + for sub_graph in sub_graphs: + if not sub_graph.completed: + # get the result for the completed operator; resolve its output + if isinstance(sub_graph.output, asyncio.Future): + await sub_graph.output + operator_output = sub_graph.output.result() + operator_output = sub_graph.parse_output(operator_output) + + # determine the next step for the particular operator, using + # its previous output and previously stored step + next_step = self.router.next( + sub_graph.step, self.ops, operator_output + ) + # update the step + sub_graph.step = next_step + + # store the output for the next step. If the next step is + # end step, this particular route has completed. Simply + # update the output value + if next_step in sub_graph.end: + sub_graph.output = operator_output + sub_graph.completed = True + else: + sub_graph.output = self._run_next( + inp=operator_output, + inference_state=sub_graph.inf, + next_step=next_step, + loop=loop, + ) + + return [x.output for x in sub_graphs] + + async def run_async(self, *args, inference_state: InferenceState, **kwargs): + """ + Run through the operators using the provided router and scheduler. + The input to a given operator is the output of the previous operator. + + :param inference_state: inference_state for the pipeline. + :param pipeline_state: pipeline_state for the pipeline. The values in the state + are created during pipeline creation and are read-only during inference. + """ + loop = asyncio.get_running_loop() + + next_step = self.router.START_ROUTE + operator_output = None + + while next_step != self.router.END_ROUTE: + # Either a dictionary key or valid index + + if next_step == self.router.SPLIT_ROUTE: + if operator_output is None: + raise ValueError( + f"{self.router.SPLIT_ROUTE} should appear after " + f"{self.ROUTER.START_ROUTE}" + ) + + operator_output = await self._apply_split( + operator_output, inference_state, loop=loop ) - # batch size of the inputs may be `> self._batch_size` at this point - engine_inputs = self.process_inputs(pipeline_inputs) - if isinstance(engine_inputs, tuple): - engine_inputs, context = engine_inputs - else: - context = {} - - timer.stop(InferenceStages.PRE_PROCESS) - self.log( - identifier="engine_inputs", - value=engine_inputs, - category=MetricCategories.DATA, - ) - - # ------ INFERENCE ------ - # split inputs into batches of size `self._batch_size` - timer.start(InferenceStages.ENGINE_FORWARD) - batches, orig_batch_size = self.split_engine_inputs( - engine_inputs, self._batch_size - ) - - # submit split batches to engine threadpool - engine_forward_with_context = partial(self.engine_forward, context=context) - batch_outputs = list( - self.executor.map(engine_forward_with_context, batches) - ) - - # join together the batches of size `self._batch_size` - engine_outputs = self.join_engine_outputs( - batch_outputs, orig_batch_size, **context - ) - timer.stop(InferenceStages.ENGINE_FORWARD) - - self.log( - identifier=f"{SystemGroups.INFERENCE_DETAILS}/input_batch_size_total", - # to get the batch size of the inputs, we need to look - # to multiply the engine batch size (self._batch_size) - # by the number of batches processed by the engine during - # a single inference call - value=len(batch_outputs) * self._batch_size, - category=MetricCategories.SYSTEM, - ) - self.log( - identifier="engine_outputs", - value=engine_outputs, - category=MetricCategories.DATA, - ) - - # ------ POSTPROCESSING ------ - timer.start(InferenceStages.POST_PROCESS) - pipeline_outputs = self.process_engine_outputs(engine_outputs, **context) - if not isinstance(pipeline_outputs, (self.output_schema, Generator)): - raise ValueError( - f"Outputs of {self.__class__} must be instances of " - f"{self.output_schema} found output of type " - f"{type(pipeline_outputs)}" + next_step = self.router.route[self.router.JOIN_ROUTE] + if next_step == self.router.END_ROUTE: + return operator_output + + if next_step == self.router.START_ROUTE: + outputs = run_func( + *args, + func=self._scheduler_group.submit, + operator=self.ops[next_step], + inference_state=inference_state, + pipeline_state=self.pipeline_state, + loop=loop, + **kwargs, ) - timer.stop(InferenceStages.POST_PROCESS) - self.log( - identifier="pipeline_outputs", - value=pipeline_outputs, - category=MetricCategories.DATA, - ) - - self.log_inference_times(timer) - - return pipeline_outputs - - @classmethod - def from_config( - cls, - config: Union["PipelineConfig", str, Path], - context: Optional[Context] = None, - logger: Optional[BaseLogger] = None, - ) -> "Pipeline": - """ - :param config: PipelineConfig object, filepath to a json serialized - PipelineConfig, or raw string of a json serialized PipelineConfig - :param context: Optional Context object to use for creating instances of - MultiModelEngine. The Context contains a shared scheduler along with - other runtime information that will be used across instances of the - MultiModelEngine to provide optimal performance when running - multiple models concurrently - :param logger: An optional DeepSparse Logger object for inference - logging. Default is None - :return: loaded Pipeline object from the config - """ - if isinstance(config, Path) or ( - isinstance(config, str) and os.path.exists(config) - ): - if isinstance(config, str): - config = Path(config) - config = PipelineConfig.parse_file(config) - if isinstance(config, str): - config = PipelineConfig.parse_raw(config) - - return cls.create( - task=config.task, - model_path=config.model_path, - engine_type=config.engine_type, - batch_size=config.batch_size, - num_cores=config.num_cores, - scheduler=config.scheduler, - input_shapes=config.input_shapes, - alias=config.alias, - context=context, - logger=logger, - **config.kwargs, - ) + await outputs + operator_output = outputs.result() - @abstractmethod - def setup_onnx_file_path(self) -> str: - """ - Performs any setup to unwrap and process the given `model_path` and other - class properties into an inference ready onnx file to be compiled by the - engine of the pipeline + else: + outputs = self._run_next( + inp=operator_output, + next_step=next_step, + inference_state=inference_state, + loop=loop, + ) + await outputs + operator_output = outputs.result() - :return: file path to the ONNX file for the engine to compile - """ - raise NotImplementedError() + if isinstance(operator_output, tuple): + state_update = operator_output[-1] + operator_output = operator_output[0] - @abstractmethod - def process_inputs( - self, - inputs: BaseModel, - ) -> Union[List[numpy.ndarray], Tuple[List[numpy.ndarray], Dict[str, Any]]]: - """ - :param inputs: inputs to the pipeline. Must be the type of the `input_schema` - of this pipeline - :return: inputs of this model processed into a list of numpy arrays that - can be directly passed into the forward pass of the pipeline engine. Can - also include a tuple with engine inputs and special key word arguments - to pass to process_engine_outputs to facilitate information from the raw - inputs to postprocessing that may not be included in the engine inputs - """ - raise NotImplementedError() + next_step = self.router.next(next_step, self.ops, operator_output) + if state_update: + inference_state.update_state(state_update) + return operator_output - @abstractmethod - def process_engine_outputs( + async def _apply_split( self, - engine_outputs: List[numpy.ndarray], - **kwargs, - ) -> BaseModel: - """ - :param engine_outputs: list of numpy arrays that are the output of the engine - forward pass - :return: outputs of engine post-processed into an object in the `output_schema` - format of this pipeline - """ - raise NotImplementedError() - - @property - def model_path_orig(self) -> str: - """ - :return: value originally passed to the `model_path` argument to initialize - this Pipeline - """ - return self._model_path_orig - - @property - def model_path(self) -> str: - """ - :return: path on local system to the onnx file of this model or directory - containing a model.onnx file along with supporting files - """ - return self._model_path - - @property - def engine_args(self) -> Dict[str, Any]: - """ - :return: arguments besides onnx filepath used to instantiate engine - """ - return self._engine_args - - @property - def engine_type(self) -> str: - """ - :return: type of inference engine used for model forward pass - """ - return self._engine_type - - @property - def timer_manager(self) -> TimerManager: - return self._timer_manager - - @property - def current_timer(self) -> Optional[StagedTimer]: - """ - :return: current timer for the pipeline, if any - """ - timer = self.timer_manager.current - - if timer is None: - timer = self.timer_manager.latest - - return timer - - @property - def benchmark(self) -> bool: - return self._benchmark - - @benchmark.setter - def benchmark(self, value: bool): - self._benchmark = value - self.timer_manager.multi = value - - def to_config(self) -> "PipelineConfig": - """ - :return: PipelineConfig that can be used to reload this object - """ - - if not hasattr(self, "task"): - raise RuntimeError( - f"{self.__class__} instance has no attribute task. Pipeline objects " - "must have a task to be serialized to a config. Pipeline objects " - "must be declared with the Pipeline.register object to be assigned a " - "task" + inp: Any, + inference_state: InferenceState, + loop: Optional[asyncio.AbstractEventLoop] = None, + ): + batches, orig_batch_size = self.expand_inputs(inp, 1) + + # Create a list of SplitRoutes, per batch size 1 + # Each SplitRoute object holds information about the particular path it + # follows. All start at the same step defined by SPLIT_ROUTE and start + # with the same inference_state. + split_graphs = [ + SubGraph( + inf=copy.deepcopy(inference_state), + step=self.router.route[self.router.SPLIT_ROUTE], + end=[self.router.JOIN_ROUTE], ) + for i in range(len(batches)) + ] - # parse any additional properties as kwargs - kwargs = {} - for attr_name, attr in self.__class__.__dict__.items(): - if isinstance(attr, property) and attr_name not in dir(PipelineConfig): - kwargs[attr_name] = getattr(self, attr_name) - - return PipelineConfig( - task=self.task, - model_path=self.model_path_orig, - engine_type=self.engine_type, - batch_size=self._batch_size, - num_cores=self._engine_args.get("num_cores"), - scheduler=self._engine_args.get("scheduler"), - input_shapes=self._engine_args.get("input_shapes"), - alias=self.alias, - kwargs=kwargs, + outputs = await self._run_sub_graphs( + sub_graph_inputs=batches, sub_graphs=split_graphs, loop=loop ) + return self.condense_inputs(outputs) - def join_engine_outputs( - self, batch_outputs: List[List[numpy.ndarray]], orig_batch_size: int, **kwargs - ) -> List[numpy.ndarray]: - """ - Joins list of engine outputs together into one list. - This is the opposite of `split_engine_inputs` and is meant to be used in tandem. - - :param batch_outputs: list of engine outputs - :param orig_batch_size: original batch size of the inputs - :return: list of engine outputs joined together - """ - return join_engine_outputs(batch_outputs, orig_batch_size) - - def split_engine_inputs( - self, items: List[numpy.ndarray], batch_size: int - ) -> List[List[numpy.ndarray]]: - """ - Splits each item into numpy arrays with the first dimension == `batch_size`. - This is the opposite of `join_engine_outputs` and is meant to be used in tandem. - - :param items: size of each batch to split into - :param batch_size: size of each batch to enforce - - :return: list of batches, where each batch is a list of numpy arrays - """ - return split_engine_inputs(items, batch_size) - - def engine_forward( - self, - engine_inputs: List[numpy.ndarray], - context: Dict = {}, - ) -> List[numpy.ndarray]: + @classmethod + def create(cls, task: str, **kwargs) -> "Pipeline": """ - :param engine_inputs: list of numpy inputs to Pipeline engine forward - pass - :param context: optional dictionary to be used during engine execution - :return: result of forward pass to Pipeline engine + :param task: Pipeline task + :param kwargs: extra task specific kwargs to be passed to the Pipeline + :return: pipeline object initialized for the given task """ - return self.engine(engine_inputs) + try: + pipeline = Operator.create(task=task, **kwargs) + if not isinstance(pipeline, cls): + raise RuntimeError( + "Pipeline was not created for the given task. The " + "provided task should be registered using the OperatorRegistry" + ) + except Exception: + from deepsparse.legacy import Pipeline - def log_inference_times(self, timer: StagedTimer): - """ - logs stage times in the given timer + pipeline = Pipeline.create(task=task, **kwargs) + return pipeline - :param timer: timer to log - """ - for stage, time in timer.times.items(): - self.log( - identifier=f"{SystemGroups.PREDICTION_LATENCY}/{stage}_seconds", - value=time, - category=MetricCategories.SYSTEM, - ) - - def _initialize_engine( + def run( self, - ) -> Union[Engine, MultiModelEngine, ORTEngine, TorchScriptEngine]: - return create_engine( - self.onnx_file_path, self.engine_type, self._engine_args, self.context - ) - - def _properties_dict(self) -> Dict: - return { - "config": self.to_config(), - "engine": self.engine, - } - - def __repr__(self): - """ - :return: Unambiguous representation of the current pipeline + *args, + inference_state: InferenceState, + **kwargs, + ): """ - return "{}({})".format(self.__class__, self._properties_dict()) + Run through the operators using the provided router and scheduler. + The input to a given operator is the output of the previous operator. - def __str__(self): + :param inference_state: inference_state for the pipeline. + :param pipeline_state: pipeline_state for the pipeline. The values in the state + are created during pipeline creation and are read-only during inference. """ - :return: Human readable form of the current pipeline - """ - formatted_props = [ - "\t{}: {}".format(key, val) for key, val in self._properties_dict().items() - ] - - return "{}.{}:\n{}".format( - self.__class__.__module__, - self.__class__.__qualname__, - "\n".join(formatted_props), - ) + next_step = self.router.START_ROUTE + operator_output = None + while next_step != self.router.END_ROUTE: + # Split Grap Execution (i.e multiple subgraphs) + # NOTE: split_route should only appear after the start route node + if next_step == self.router.SPLIT_ROUTE: + if operator_output is None: + raise ValueError( + f"{self.router.SPLIT_ROUTE} should appear after " + f"{self.router.START_ROUTE}" + ) -class PipelineConfig(BaseModel): - """ - Configuration for creating a Pipeline object - - Can be used to create a Pipeline from a config object or file with - Pipeline.from_config(), or used as a building block for other configs - such as for deepsparse.server - """ - - task: str = Field( - description="name of task to create a pipeline for", - ) - model_path: str = Field( - default=None, - description="path on local system or SparseZoo stub to load the model from", - ) - engine_type: str = Field( - default=DEEPSPARSE_ENGINE, - description=( - "inference engine to use. Currently supported values include " - "'deepsparse' and 'onnxruntime'. Default is 'deepsparse'" - ), - ) - batch_size: Optional[int] = Field( - default=1, - description=("static batch size to use for inference. Default is 1"), - ) - num_cores: int = Field( - default=None, - description=( - "number of CPU cores to allocate for inference engine. None" - "specifies all available cores. Default is None" - ), - ) - scheduler: Optional[str] = Field( - default="async", - description=( - "(deepsparse only) kind of scheduler to execute with. Defaults to async" - ), - ) - input_shapes: List[List[int]] = Field( - default=None, - description=( - "list of shapes to set ONNX the inputs to. Pass None to use model as-is. " - "Default is None" - ), - ) - alias: str = Field( - default=None, - description=( - "optional name to give this pipeline instance, useful when inferencing " - "with multiple models. Default is None" - ), - ) - kwargs: Dict[str, Any] = Field( - default={}, - description=( - "Additional arguments for inference with the model that will be passed " - "into the pipeline as kwargs" - ), - ) + operator_output = asyncio.run( + self._apply_split(operator_output, inference_state) + ) + next_step = self.router.route[self.router.JOIN_ROUTE] + if next_step == self.router.END_ROUTE: + return operator_output + + if next_step == self.router.START_ROUTE: + operator_output = run_func( + *args, + func=self._scheduler_group.submit, + operator=self.ops[next_step], + inference_state=inference_state, + pipeline_state=self.pipeline_state, + **kwargs, + ).result() + + if isinstance(operator_output, tuple): + operator_output, state_update = ( + operator_output[0], + operator_output[-1], + ) + inference_state.update_state(state_update) + + next_step = self.router.next(next_step, self.ops, operator_output) + else: + # Single graph execution + graph = SubGraph( + inf=copy.deepcopy(inference_state), + step=next_step, + end=[self.router.SPLIT_ROUTE, self.router.END_ROUTE], + ) -class BucketingPipeline(object): - """ - A Proxy class that adds Bucketing functionality to Pipelines + operator_output = asyncio.run( + self._run_sub_graphs( + sub_graph_inputs=[operator_output], sub_graphs=[graph] + ) + )[0] - :param pipelines: A list of Pipeline objects/buckets that implement - `Bucketable` contract - """ + inference_state = graph.inf + next_step = graph.step - def __init__(self, pipelines: List[Pipeline]): - if not (pipelines and isinstance(pipelines, list)): - raise ValueError( - "Expected a non empty List of pipeline objects but got " f"{pipelines}" - ) - self._pipelines = pipelines - self._pipeline_class = pipelines[0].__class__ - self._validate_pipeline_class() + return operator_output def __call__(self, *args, **kwargs): - bucket, parsed_inputs = self._choose_bucket(*args, **kwargs) - return bucket(parsed_inputs) - - def _choose_bucket(self, *args, **kwargs): - parsed_inputs = self._pipelines[-1].parse_inputs(*args, **kwargs) - bucket = self._pipeline_class.route_input_to_bucket( - input_schema=parsed_inputs, - pipelines=self._pipelines, - ) - return bucket, parsed_inputs - - def __getattr__(self, item): - value = getattr(self._pipelines[0].__class__, item) - - if isinstance(value, property): - return getattr(self._pipelines[0], item) - - raise AttributeError( - f"{item} not found in {self.__class__.__name__}, " - f"and is not a property of {self._pipeline_class.__name__}" - ) - - @property - def input_schema(self) -> Type[BaseModel]: """ - :return: pydantic model class that inputs to this pipeline must comply to - """ - return self._pipelines[0].input_schema + Consolidate any provided inference_state or pipeline_state objects and pass + any other operator inputs to run(). - @property - def output_schema(self) -> Type[BaseModel]: - """ - :return: pydantic model class that outputs of this pipeline must comply to + :return: output of the pipeline operators ran with the router for the given + input """ - return self._pipelines[0].output_schema - - def _validate_pipeline_class(self): - # validate all pipelines belong to the same class - - if not issubclass(self._pipeline_class, Bucketable): - raise ValueError(f"{self._pipeline_class} is not Bucketable") - - is_valid = all( - isinstance(pipeline, self._pipeline_class) for pipeline in self._pipelines - ) - - if not is_valid: - raise ValueError( - "All Pipeline Buckets must belong to the same Pipeline Class" - ) + if kwargs.get("inference_state"): + inference_state = kwargs.pop("inference_state") + else: + inference_state = InferenceState() + inference_state.create_state({}) + kwargs["inference_state"] = inference_state -class Bucketable(ABC): - """ - A contract, that ensures implementing Pipeline class can create multiple Pipeline - instances and route each input sample to correct instance based off of specific - implementations of abstract methods defined in this contract - """ + return self.run(*args, **kwargs) - @staticmethod - @abstractmethod - def should_bucket(*args, **kwargs) -> bool: + def expand_inputs(self, *args, **kwargs): """ - :returns: True if buckets should be created else False + Generic function to handle expanding values. """ - pass + raise NotImplementedError( + "This function should be implemented for any router with split or join" + "nodes. expand_inputs will be called prior to the split node (stored in " + "the router's SPLIT_ROUTE attribute), expanding outputs for each output " + "such that there is a batch size of one per thread." + ) - @staticmethod - @abstractmethod - def create_pipeline_buckets(*args, **kwargs) -> List[Pipeline]: + def condense_inputs(self, *args, **kwargs): """ - :return: Create and return a list of Pipeline objects - representing different buckets + Generic function to handle condensing values. """ - pass + raise NotImplementedError( + "This function should be implemented for any router with split or join " + "nodes. condense_inputs will be called after the join node (stored in the " + "router's JOIN_ROUTE attribute), condensing outputs from multiple threads." + ) - @staticmethod - @abstractmethod - def route_input_to_bucket( - *args, input_schema: BaseModel, pipelines: List[Pipeline], **kwargs - ) -> Pipeline: + def validate(self): """ - :param input_schema: The schema representing an input to the pipeline - :param pipelines: Different buckets to be used - :return: The correct Pipeline object (or Bucket) to route input to + Validate that compatability of the router and operators provided. """ - pass - - -def create_engine( - onnx_file_path: str, - engine_type: str, - engine_args: Dict, - context: Optional[Context] = None, -) -> Union[Engine, MultiModelEngine, ORTEngine]: - """ - Create an inference engine for a given ONNX model - - :param onnx_file_path: path to ONNX model file - :param engine_type: type of engine to create. - :param engine_args: arguments to pass to engine constructor - :param context: context to use for engine - :return: inference engine - """ - engine_type = engine_type.lower() - - if engine_type == DEEPSPARSE_ENGINE: - if context is not None and isinstance(context, Context): - engine_args.pop("num_cores", None) - engine_args.pop("scheduler", None) - engine_args.pop("num_streams", None) - engine_args["context"] = context - return MultiModelEngine( - model=onnx_file_path, - **engine_args, - ) - engine_args.pop("cache_output_bools", None) - return Engine(onnx_file_path, **engine_args) - - if engine_type == ORT_ENGINE: - return ORTEngine(onnx_file_path, **engine_args) - - if engine_type == TORCHSCRIPT_ENGINE: - return TorchScriptEngine(onnx_file_path, **engine_args) - - raise ValueError( - f"Unknown engine_type {engine_type}. Supported values include: " - f"{SUPPORTED_PIPELINE_ENGINES}" - ) - - -def _initialize_executor_and_workers( - batch_size: Optional[int], - workers_or_executor: Optional[Union[int, ThreadPoolExecutor]], -) -> Tuple[Optional[ThreadPoolExecutor], int]: - if isinstance(workers_or_executor, ThreadPoolExecutor): - num_async_workers = workers_or_executor._max_workers # noqa - executor = workers_or_executor - elif isinstance(workers_or_executor, int): - num_async_workers = max(1, workers_or_executor) - executor = ThreadPoolExecutor(max_workers=num_async_workers) - elif batch_size is None and workers_or_executor is None: - # default num workers to num available cores / 2 - num_cpu_cores_avaailable = cpu_details()[0] - num_async_workers = max(1, num_cpu_cores_avaailable // 2) - executor = ThreadPoolExecutor(max_workers=num_async_workers) - elif workers_or_executor is not None: - raise ValueError( - "Expected an int or ThreadPoolExecutor to run in async mode" - f" but got {workers_or_executor} of type {type(workers_or_executor)}" - ) - else: - executor = ThreadPoolExecutor(max_workers=1) - num_async_workers = 1 - - if batch_size is None and executor is None: - raise ValueError( - "Must have an ThreadPoolExecutor for running in dynamic batch mode " - f"but got {None}" - ) - - return executor, num_async_workers - - -def text_generation_pipeline( - *args, model: Optional[str] = None, **kwargs -) -> "Pipeline": - """ - :return: text generation pipeline with the given args and - kwargs passed to Pipeline.create - """ - kwargs = _parse_model_arg(model, **kwargs) - return Pipeline.create("text_generation", *args, **kwargs) - - -def code_generation_pipeline( - *args, model: Optional[str] = None, **kwargs -) -> "Pipeline": - """ - :return: text generation pipeline with the given args and - kwargs passed to Pipeline.create - """ - kwargs = _parse_model_arg(model, **kwargs) - return Pipeline.create("code_generation", *args, **kwargs) - - -def chat_pipeline(*args, model: Optional[str] = None, **kwargs) -> "Pipeline": - """ - :return: text generation pipeline with the given args and - kwargs passed to Pipeline.create - """ - kwargs = _parse_model_arg(model, **kwargs) - return Pipeline.create("chat", *args, **kwargs) - - -def _parse_model_arg(model: Optional[str], **kwargs) -> dict: - if model is not None: - model_path = kwargs.get("model_path") - if model_path is not None: - raise ValueError( - f"Only one of model and model_path may be supplied, found {model} " - f"and {model_path} respectively" - ) - kwargs["model_path"] = model - return kwargs - - -# aliases for top level import -TextGeneration = text_generation_pipeline -CodeGeneration = code_generation_pipeline -Chat = chat_pipeline - - -def question_answering_pipeline(*args, **kwargs) -> "Pipeline": - """ - transformers question_answering pipeline - - example instantiation: - ```python - question_answering = Pipeline.create( - task="question_answering", - model_path="question_answering_model_dir/", - ) - ``` - - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - If a list of lengths is provided, then for each length, a model and - tokenizer will be compiled capable of handling that sequence length - (also known as a bucket). Default is 128 - :param doc_stride: if the context is too long to fit with the question for the - model, it will be split in several chunks with some overlap. This argument - controls the size of that overlap. Currently, only reading the first span - is supported (everything after doc_stride will be truncated). Default - is 128 - :param max_question_len: maximum length of the question after tokenization. - It will be truncated if needed. Default is 64 - :param max_answer_len: maximum length of answer after decoding. Default is 15 - """ - return Pipeline.create("question_answering", *args, **kwargs) - - -def text_classification_pipeline(*args, **kwargs) -> "Pipeline": - """ - transformers text classification pipeline - - example instantiation: - ```python - text_classifier = Pipeline.create( - task="text_classification", - model_path="text_classification_model_dir/", - batch_size=BATCH_SIZE, - ) - ``` - - example batch size 1, single text inputs (ie sentiment analysis): - ```python - sentiment = text_classifier("the food tastes great") - sentiment = text_classifier(["the food tastes great"]) - sentiment = text_classifier([["the food tastes great"]]) - ``` - - example batch size 1, multi text input (ie QQP like tasks): - ```python - prediction = text_classifier([["how is the food?", "what is the food?"]]) - ``` - - example batch size n, single text inputs: - ```python - sentiments = text_classifier(["the food tastes great", "the food tastes bad"]) - sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]]) - ``` - - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - If a list of lengths is provided, then for each length, a model and - tokenizer will be compiled capable of handling that sequence length - (also known as a bucket). Default is 128 - :param return_all_scores: if True, instead of returning the prediction as the - argmax of model class predictions, will return all scores and labels as - a list for each result in the batch. Default is False - """ - return Pipeline.create("text_classification", *args, **kwargs) - - -def sentiment_analysis_pipeline(*args, **kwargs) -> "Pipeline": - """ - transformers text classification pipeline - - example instantiation: - ```python - text_classifier = Pipeline.create( - task="text_classification", - model_path="text_classification_model_dir/", - batch_size=BATCH_SIZE, - ) - ``` - - example batch size 1, single text inputs (ie sentiment analysis): - ```python - sentiment = text_classifier("the food tastes great") - sentiment = text_classifier(["the food tastes great"]) - sentiment = text_classifier([["the food tastes great"]]) - ``` - - example batch size 1, multi text input (ie QQP like tasks): - ```python - prediction = text_classifier([["how is the food?", "what is the food?"]]) - ``` - - example batch size n, single text inputs: - ```python - sentiments = text_classifier(["the food tastes great", "the food tastes bad"]) - sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]]) - ``` - - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - If a list of lengths is provided, then for each length, a model and - tokenizer will be compiled capable of handling that sequence length - (also known as a bucket). Default is 128 - :param return_all_scores: if True, instead of returning the prediction as the - argmax of model class predictions, will return all scores and labels as - a list for each result in the batch. Default is False - """ - return Pipeline.create("text_classification", *args, **kwargs) - - -def token_classification_pipeline(*args, **kwargs) -> "Pipeline": - """ - transformers token classification pipeline - - example instantiation: - ```python - token_classifier = Pipeline.create( - task="token_classification", - model_path="token_classification_model_dir/", - batch_size=BATCH_SIZE, - ) - ``` - - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - If a list of lengths is provided, then for each length, a model and - tokenizer will be compiled capable of handling that sequence length - (also known as a bucket). Default is 128 - :param aggregation_strategy: how to aggregate tokens in postprocessing. Options - include 'none', 'simple', 'first', 'average', and 'max'. Default is None - :param ignore_labels: list of label names to ignore in output. Default is - ['0'] which ignores the default known class label - """ - return Pipeline.create("token_classification", *args, **kwargs) - - -def image_classification_pipeline(*args, **kwargs) -> "Pipeline": - """ - Image classification pipeline for DeepSparse - - :param model_path: path on local system or SparseZoo stub to load the model from - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param class_names: Optional dict, or json file of class names to use for - mapping class ids to class labels. Default is None - """ - return Pipeline.create("image_classification", *args, **kwargs) - - -def yolo_pipeline(*args, **kwargs) -> "Pipeline": - """ - Image Segmentation YOLO pipeline for DeepSparse + router_validation = self.router.validate(self.ops) - :param model_path: path on local system or SparseZoo stub to load the model from - :param engine_type: inference engine to use. Currently supported values - include 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param class_names: Optional string identifier, dict, or json file of - class names to use for mapping class ids to class labels. Default is - `coco` - """ - return Pipeline.create("yolo", *args, **kwargs) - - -def haystack_pipeline(*args, **kwargs) -> "Pipeline": - """ - Neural Magic pipeline for running Haystack DocumentSearchPipeline. - Supports selected Haystack Nodes as well as Haystack nodes integrated - with the Neural Magic DeepSparse Engine - - example embedding model instantiation: - ```python - haystack_pipeline = Pipeline.create( - task="information_retrieval_haystack", - model_path="masked_language_modeling_model_dir/", - config={ - "document_store": "InMemoryDocumentStore", - "document_store_args": { - "similarity": "cosine", - "use_gpu": False, - }, - "retriever": "DeepSparseEmbeddingRetriever", - "retriever_args": { - "extraction_strategy": "reduce_mean" - } - }, - ) - ``` - - example deepsparse biencoder instantiation - ```python - haystack_pipeline = Pipeline.create( - task="information_retrieval_haystack", - config={ - "document_store": "InMemoryDocumentStore", - "document_store_args": { - "similarity": "cosine", - "use_gpu": False, - }, - "retriever": "DeepSparseDensePassageRetriever", - "retriever_args": { - "query_model_path": "./query_model", - "passage_model_path": "./passage_model" - } - }, - ) - ``` - - writing documents: - ```python - haystack_pipeline.write_documents([ - { - "title": "Claude Shannon", - "content": "Claude Elwood Shannon was an American mathematician, " - "electrical engineer, and cryptographer known as a father of " - "information theory. He was a 21-year-old master's degree student at " - "the Massachusetts Institute of Technology (MIT)." - }, - { - "title": "Vincent van Gogh", - "content": "Van Gogh was born into an upper-middle-class family. " - "As a child he was serious, quiet and thoughtful. He began drawing " - "at an early age and as a young man worked as an art dealer." - }, - { - "title": "Stevie Wonder", - "content": "Stevland Hardaway Morris, known professionally as " - "Stevie Wonder, is an American singer and musician, who is " - "credited as a pioneer and influence by musicians across a range " - "of genres." - } - ]) - ``` - - example queries: - ```python - from deepsparse.transformers.haystack import print_pipeline_documents - pipeline_outputs = haystack_pipeline( - queries="who invented information theory", - params={"Retriever": {"top_k": 4}} - ) - print_pipeline_documents(pipeline_outputs) - - pipeline_outputs = haystack_pipeline( - queries=[ - "famous artists", - "What is Stevie Wonder's real name?" - ], - params={"Retriever": {"top_k": 4}} - ) - print_pipeline_documents(pipeline_outputs) - ``` - - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - If a list of lengths is provided, then for each length, a model and - tokenizer will be compiled capable of handling that sequence length - (also known as a bucket). Default is 128 - :param docs: list of documents to be written to document_store. Can also - be written after instantiation with write_documents method. - Default is None - :param config: dictionary or instance of HaystackPipelineConfig. Used to - specify Haystack node arguments - :param retriever_kwargs: keyword arguments to be passed to retriever. If - the retriever is a deepsparse retriever, then these arguments will also - be passed to the TransformersEmbeddingExtractionPipeline of the retriever - """ - return Pipeline.create("information_retrieval_haystack", *args, **kwargs) - - -def embedding_extraction_pipeline(*args, **kwargs) -> "Pipeline": - """ - embedding extraction pipeline for extracting intermediate layer embeddings - from transformer models - - example instantiation: - ```python - embedding_extraction_pipeline = Pipeline.create( - task="embedding_extraction", - model_path="masked_language_modeling_model_dir/", - ) - results = embedding_extraction_pipeline( - [ - "the warriors have won the nba finals" - "the warriors are the greatest basketball team ever" - ] - ) - emb_1, emb_2 = results.embeddings - # (expect emb_1 and emb_2 to have high cosine similiarity) - ``` - - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: static batch size to use for inference. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - If a list of lengths is provided, then for each length, a model and - tokenizer will be compiled capable of handling that sequence length - (also known as a bucket). Default is 128 - :param emb_extraction_layer: if an int, the transformer layer number from - which the embeddings will be extracted. If a string, the name of last - ONNX node in model to draw embeddings from. If None, leave the model - unchanged. Default is -1 (last transformer layer before prediction head) - :param model_size: size of transformer model (size of hidden layer per token - if the model is cut). Default is 768 - :param extraction_strategy: method of pooling embedding values. Currently - supported values are 'per_token', 'reduce_mean', 'reduce_max' and 'cls_token'. - Default is 'per_token' - :param return_numpy: return embeddings a list of numpy arrays, list of lists - of floats otherwise. Default is True - :param context: context for engine. If None, then the engine will be initialized - with 2 streams to make use of parallel inference of labels. Default is None - """ - return Pipeline.create("embedding_extraction", *args, **kwargs) - - -def zero_shot_text_classification_pipeline(*args, **kwargs) -> "Pipeline": - """ - Transformers zero shot text classification pipeline. This pipeline allows for - text classification using models which were trained on datasets not originally - meant for this task. - - This class upon construction returns an instance of a child Pipeline which - inherits from ZeroShotTextClassificationPipelineBase. Which type of Pipeline - is returned depends on the value of the passed model_scheme argument. - - example dynamic labels: - ```python - zero_shot_text_classifier = Pipeline.create( - task="zero_shot_text_classification", - model_scheme="mnli", - model_config={"hypothesis_template": "This text is related to {}"}, - model_path="mnli_model_dir/", - ) - - sequence_to_classify = "Who are you voting for in 2020?" - candidate_labels = ["Europe", "public health", "politics"] - zero_shot_text_classifier(sequences=sequence_to_classify, labels=candidate_labels) - >>> ZeroShotTextClassificationOutput( - sequences='Who are you voting for in 2020?', - labels=['politics', 'public health', 'Europe'], - scores=[0.9073666334152222, 0.046810582280159, 0.04582275450229645]) - ``` - - example static labels: - ```python - zero_shot_text_classifier = Pipeline.create( - task="zero_shot_text_classification", - model_scheme="mnli", - model_config={"hypothesis_template": "This text is related to {}"}, - model_path="mnli_model_dir/", - labels=["politics", "Europe", "public health"] - ) - - sequence_to_classify = "Who are you voting for in 2020?" - zero_shot_text_classifier(sequences=sequence_to_classify) - >>> ZeroShotTextClassificationOutput( - sequences='Who are you voting for in 2020?', - labels=['politics', 'public health', 'Europe'], - scores=[0.9073666334152222, 0.046810582280159, 0.04582275450229645]) - ``` - - Note that labels must either be provided during pipeline instantiation via - the constructor, at inference time, but not both. - - Note that if a hypothesis_template is provided at inference time, then it - will override the value provided during model instantiation - - :param model_path: sparsezoo stub to a transformers model or (preferred) a - directory containing a model.onnx, tokenizer config, and model config - :param engine_type: inference engine to use. Currently supported values include - 'deepsparse' and 'onnxruntime'. Default is 'deepsparse' - :param batch_size: batch size must divide sequences * labels, regardless of - whether using dynamic or static labels. Default is 1 - :param num_cores: number of CPU cores to allocate for inference engine. None - specifies all available cores. Default is None - :param scheduler: (deepsparse only) kind of scheduler to execute with. - Pass None for the default - :param input_shapes: list of shapes to set ONNX the inputs to. Pass None - to use model as-is. Default is None - :param alias: optional name to give this pipeline instance, useful when - inferencing with multiple models. Default is None - :param sequence_length: sequence length to compile model and tokenizer for. - If a list of lengths is provided, then for each length, a model and - tokenizer will be compiled capable of handling that sequence length - (also known as a bucket). Default is 128 - :param default_model_name: huggingface transformers model name to use to - load a tokenizer and model config when none are provided in the `model_path`. - Default is "bert-base-uncased" - :param model_scheme: training scheme used to train the model used for zero shot. - Default is "mnli" - :param model_config: config object specific to the model_scheme of this model - or a dict of config keyword arguments - :param labels: static list of labels to perform text classification with. Can - also be provided at inference time - :param context: context for engine. If None, then the engine will be initialized - with 2 streams to make use of parallel inference of labels - """ - return Pipeline.create("zero_shot_text_classification", *args, **kwargs) + if router_validation is False: + # default error message + op_types = [type(op) for op in self.ops] + raise ValueError(f"Invalid Router: {type(self.router)} for ops: {op_types}") + elif isinstance(router_validation, str): + raise ValueError(f"Invalid Router for operators: {router_validation}") diff --git a/src/deepsparse/pipelines/custom_pipeline.py b/src/deepsparse/pipelines/custom_pipeline.py index 5fe6b014a8..a10ab32ea1 100644 --- a/src/deepsparse/pipelines/custom_pipeline.py +++ b/src/deepsparse/pipelines/custom_pipeline.py @@ -18,7 +18,7 @@ import numpy from pydantic import BaseModel -from deepsparse.pipeline import Pipeline +from deepsparse.legacy.pipeline import Pipeline from deepsparse.utils.onnx import model_to_path diff --git a/src/deepsparse/pipelines/embedding_extraction.py b/src/deepsparse/pipelines/embedding_extraction.py index e812b3e9a9..801c242afd 100644 --- a/src/deepsparse/pipelines/embedding_extraction.py +++ b/src/deepsparse/pipelines/embedding_extraction.py @@ -23,7 +23,7 @@ import numpy from pydantic import BaseModel, Field -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.log import get_main_logger from deepsparse.utils import truncate_onnx_embedding_model diff --git a/src/deepsparse/v2/routers/__init__.py b/src/deepsparse/routers/__init__.py similarity index 100% rename from src/deepsparse/v2/routers/__init__.py rename to src/deepsparse/routers/__init__.py diff --git a/src/deepsparse/v2/routers/router.py b/src/deepsparse/routers/router.py similarity index 97% rename from src/deepsparse/v2/routers/router.py rename to src/deepsparse/routers/router.py index 6740f706f1..08e2fe5aa9 100644 --- a/src/deepsparse/v2/routers/router.py +++ b/src/deepsparse/routers/router.py @@ -17,7 +17,7 @@ from abc import abstractmethod from typing import Any, Dict, List, Optional, Union -from deepsparse.v2.operators import Operator +from deepsparse.operators import Operator _LOGGER = logging.getLogger(__name__) @@ -83,7 +83,7 @@ class LinearRouter(Router): def __init__(self, end_route: int, start_route: int = 0): super().__init__(end_route=end_route, start_route=start_route) - _LOGGER.warn("SPLIT and JOIN are not yet supported for the LinearRouter.") + _LOGGER.warning("SPLIT and JOIN are not yet supported for the LinearRouter.") def next( self, past: int, ops: Optional[List[Operator]] = None, inp: Optional[Any] = None diff --git a/src/deepsparse/v2/schedulers/__init__.py b/src/deepsparse/schedulers/__init__.py similarity index 100% rename from src/deepsparse/v2/schedulers/__init__.py rename to src/deepsparse/schedulers/__init__.py diff --git a/src/deepsparse/v2/schedulers/continuous_batching_scheduler.py b/src/deepsparse/schedulers/continuous_batching_scheduler.py similarity index 97% rename from src/deepsparse/v2/schedulers/continuous_batching_scheduler.py rename to src/deepsparse/schedulers/continuous_batching_scheduler.py index cc74ac0996..03bcda019f 100644 --- a/src/deepsparse/v2/schedulers/continuous_batching_scheduler.py +++ b/src/deepsparse/schedulers/continuous_batching_scheduler.py @@ -17,9 +17,9 @@ from threading import Lock from typing import List -from deepsparse.v2.operators import EngineOperator, Operator -from deepsparse.v2.schedulers.scheduler import OperatorScheduler -from deepsparse.v2.schedulers.utils import ( +from deepsparse.operators import EngineOperator, Operator +from deepsparse.schedulers.scheduler import OperatorScheduler +from deepsparse.schedulers.utils import ( ContinuousBatchingExecutorThread, ContinuousBatchingQueues, ) diff --git a/src/deepsparse/v2/schedulers/scheduler.py b/src/deepsparse/schedulers/scheduler.py similarity index 98% rename from src/deepsparse/v2/schedulers/scheduler.py rename to src/deepsparse/schedulers/scheduler.py index 37f2cfce90..6e89d334dc 100644 --- a/src/deepsparse/v2/schedulers/scheduler.py +++ b/src/deepsparse/schedulers/scheduler.py @@ -17,7 +17,7 @@ from concurrent.futures import Future, ThreadPoolExecutor from typing import Callable, Optional -from deepsparse.v2.operators import Operator +from deepsparse.operators import Operator __all__ = ["OperatorScheduler"] diff --git a/src/deepsparse/v2/schedulers/scheduler_group.py b/src/deepsparse/schedulers/scheduler_group.py similarity index 94% rename from src/deepsparse/v2/schedulers/scheduler_group.py rename to src/deepsparse/schedulers/scheduler_group.py index 201fcee150..01e590435d 100644 --- a/src/deepsparse/v2/schedulers/scheduler_group.py +++ b/src/deepsparse/schedulers/scheduler_group.py @@ -16,8 +16,8 @@ from concurrent.futures import Future from typing import Any, List -from deepsparse.v2.operators import Operator -from deepsparse.v2.schedulers.scheduler import OperatorScheduler +from deepsparse.operators import Operator +from deepsparse.schedulers.scheduler import OperatorScheduler __all__ = ["SchedulerGroup"] diff --git a/src/deepsparse/v2/schedulers/utils/__init__.py b/src/deepsparse/schedulers/utils/__init__.py similarity index 100% rename from src/deepsparse/v2/schedulers/utils/__init__.py rename to src/deepsparse/schedulers/utils/__init__.py diff --git a/src/deepsparse/v2/schedulers/utils/continuous_batching_executor.py b/src/deepsparse/schedulers/utils/continuous_batching_executor.py similarity index 95% rename from src/deepsparse/v2/schedulers/utils/continuous_batching_executor.py rename to src/deepsparse/schedulers/utils/continuous_batching_executor.py index 40ff00ca4f..7a2fdba123 100644 --- a/src/deepsparse/v2/schedulers/utils/continuous_batching_executor.py +++ b/src/deepsparse/schedulers/utils/continuous_batching_executor.py @@ -16,8 +16,8 @@ from typing import Dict from deepsparse import Engine -from deepsparse.v2.operators import EngineOperator -from deepsparse.v2.schedulers.utils.continuous_batching_queues import ( +from deepsparse.operators import EngineOperator +from deepsparse.schedulers.utils.continuous_batching_queues import ( ContinuousBatchingQueues, ) diff --git a/src/deepsparse/v2/schedulers/utils/continuous_batching_queues.py b/src/deepsparse/schedulers/utils/continuous_batching_queues.py similarity index 100% rename from src/deepsparse/v2/schedulers/utils/continuous_batching_queues.py rename to src/deepsparse/schedulers/utils/continuous_batching_queues.py diff --git a/src/deepsparse/server/cli.py b/src/deepsparse/server/cli.py index c51ba2f972..d92ef1a3bb 100644 --- a/src/deepsparse/server/cli.py +++ b/src/deepsparse/server/cli.py @@ -27,7 +27,8 @@ import click import yaml -from deepsparse.pipeline import SupportedTasks +# TODO: update to use new tasks once server support lands +from deepsparse.legacy.tasks import SupportedTasks from deepsparse.server.config import EndpointConfig, ServerConfig from deepsparse.server.deepsparse_server import DeepsparseServer from deepsparse.server.openai_server import OpenAIServer diff --git a/src/deepsparse/server/config.py b/src/deepsparse/server/config.py index 8a8f01a0a8..aafb42e59c 100644 --- a/src/deepsparse/server/config.py +++ b/src/deepsparse/server/config.py @@ -17,14 +17,14 @@ from pydantic import BaseModel, Field, validator -from deepsparse import DEEPSPARSE_ENGINE, PipelineConfig +from deepsparse.legacy.pipeline import DEEPSPARSE_ENGINE, PipelineConfig +from deepsparse.legacy.tasks import SupportedTasks from deepsparse.loggers.config import ( MetricFunctionConfig, PipelineSystemLoggingConfig, SystemLoggingConfig, SystemLoggingGroup, ) -from deepsparse.tasks import SupportedTasks __all__ = [ diff --git a/src/deepsparse/server/deepsparse_server.py b/src/deepsparse/server/deepsparse_server.py index da68d64ee9..a6dffe5346 100644 --- a/src/deepsparse/server/deepsparse_server.py +++ b/src/deepsparse/server/deepsparse_server.py @@ -15,7 +15,7 @@ import logging from functools import partial -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.server.config import EndpointConfig from deepsparse.server.server import CheckReady, ModelMetaData, ProxyPipeline, Server from fastapi import FastAPI diff --git a/src/deepsparse/server/openai_server.py b/src/deepsparse/server/openai_server.py index d32dab0d62..c9f4ef2f16 100644 --- a/src/deepsparse/server/openai_server.py +++ b/src/deepsparse/server/openai_server.py @@ -18,7 +18,7 @@ from http import HTTPStatus from typing import AsyncGenerator, Dict, List, Optional -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.server.config import EndpointConfig from deepsparse.server.helpers import create_error_response from deepsparse.server.output import CompletionOutput, RequestOutput diff --git a/src/deepsparse/tasks.py b/src/deepsparse/tasks.py index b220519571..7d0c8cef64 100644 --- a/src/deepsparse/tasks.py +++ b/src/deepsparse/tasks.py @@ -78,30 +78,6 @@ class SupportedTasks: The supported tasks in the DeepSparse pipeline and system """ - nlp = namedtuple( - "nlp", - [ - "question_answering", - "text_classification", - "token_classification", - "zero_shot_text_classification", - "transformers_embedding_extraction", - ], - )( - question_answering=AliasedTask("question_answering", ["qa"]), - text_classification=AliasedTask( - "text_classification", ["glue", "sentiment_analysis"] - ), - token_classification=AliasedTask("token_classification", ["ner"]), - zero_shot_text_classification=AliasedTask("zero_shot_text_classification", []), - transformers_embedding_extraction=AliasedTask( - "transformers_embedding_extraction", [] - ), - ) - - chat = namedtuple("chat", ["chatbot", "chat"])( - chatbot=AliasedTask("chatbot", []), chat=AliasedTask("chat", []) - ) text_generation = namedtuple( "text_generation", ["text_generation", "opt", "bloom"] )( @@ -109,8 +85,12 @@ class SupportedTasks: opt=AliasedTask("opt", []), bloom=AliasedTask("bloom", []), ) - code_generation = namedtuple("code_generation", ["code_generation", "codegen"])( + + code_generation = namedtuple( + "code_generation", ["code_generation", "code_gen", "codegen"] + )( code_generation=AliasedTask("code_generation", []), + code_gen=AliasedTask("code_gen", []), codegen=AliasedTask("codegen", []), ) @@ -121,43 +101,7 @@ class SupportedTasks: ), ) - yolo = namedtuple("yolo", ["yolo"])( - yolo=AliasedTask("yolo", ["yolo"]), - ) - yolov8 = namedtuple("yolov8", ["yolov8"])( - yolov8=AliasedTask("yolov8", ["yolov8"]), - ) - yolact = namedtuple("yolact", ["yolact"])( - yolact=AliasedTask("yolact", ["yolact"]), - ) - - haystack = namedtuple("haystack", ["information_retrieval_haystack"])( - information_retrieval_haystack=AliasedTask( - "information_retrieval_haystack", ["haystack"] - ), - ) - embedding_extraction = namedtuple("embedding_extraction", ["embedding_extraction"])( - embedding_extraction=AliasedTask( - "embedding_extraction", ["embedding_extraction"] - ), - ) - open_pif_paf = namedtuple("open_pif_paf", ["open_pif_paf"])( - open_pif_paf=AliasedTask("open_pif_paf", ["open_pif_paf"]), - ) - - all_task_categories = [ - nlp, - image_classification, - yolo, - yolov8, - yolact, - haystack, - embedding_extraction, - open_pif_paf, - text_generation, - chat, - code_generation, - ] + all_task_categories = [text_generation, code_generation, image_classification] @classmethod def check_register_task( @@ -168,54 +112,16 @@ def check_register_task( :param extra_tasks: valid task names that are not included in supported tasks. i.e. tasks registered to Pipeline at runtime """ - if task == "custom": - # custom task, register the CustomPipeline - import deepsparse.pipelines.custom_pipeline # noqa: F401 - - elif cls.is_text_generation(task): + if cls.is_text_generation(task): import deepsparse.transformers.pipelines.text_generation # noqa: F401 - elif cls.is_chat(task): - import deepsparse.transformers.pipelines.chat # noqa: F401 - elif cls.is_code_generation(task): import deepsparse.transformers.pipelines.code_generation # noqa: F401 - elif cls.is_nlp(task): - # trigger transformers pipelines to register with Pipeline.register - import deepsparse.transformers.pipelines # noqa: F401 - elif cls.is_image_classification(task): # trigger image classification pipelines to # register with Pipeline.register - import deepsparse.image_classification.pipelines # noqa: F401 - - elif cls.is_yolact(task): - # trigger yolo pipelines to register with Pipeline.register - import deepsparse.yolact.pipelines # noqa: F401 - - elif cls.is_yolo(task): - # trigger yolo pipelines to register with Pipeline.register - import deepsparse.yolo.pipelines # noqa: F401 - - elif cls.is_yolov8(task): - # trigger yolo pipelines to register with Pipeline.register - import deepsparse.yolov8.pipelines # noqa: F401 - - elif cls.is_haystack(task): - # trigger haystack pipeline as well as transformers pipelines to - # register with Pipeline.register - import deepsparse.transformers.haystack # noqa: F401 - - elif cls.is_embedding_extraction(task): - # trigger embedding_extraction pipelines to register with - # Pipeline.register - import deepsparse.pipelines.embedding_extraction # noqa :F401 - - elif cls.is_open_pif_paf(task): - # trigger embedding_extraction pipelines to register with - # Pipeline.register - import deepsparse.open_pif_paf.pipelines # noqa :F401 + import deepsparse.image_classification.pipeline # noqa: F401 all_tasks = set(cls.task_names() + (list(extra_tasks or []))) if task not in all_tasks: @@ -224,14 +130,6 @@ def check_register_task( f"{list(all_tasks)}" ) - @classmethod - def is_chat(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is a chat task - :return: True if it is a chat task, False otherwise - """ - return any(chat_task.matches(task) for chat_task in cls.chat) - @classmethod def is_text_generation(cls, task: str) -> bool: """ @@ -244,37 +142,6 @@ def is_text_generation(cls, task: str) -> bool: for text_generation_task in cls.text_generation ) - @classmethod - def is_code_generation(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is a text generation task - such as codegen - :return: True if it is a text generation task, False otherwise - """ - return any( - code_generation_task.matches(task) - for code_generation_task in cls.code_generation - ) - - @classmethod - def is_nlp(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is an nlp task - such as question_answering - :return: True if it is an nlp task, False otherwise - """ - return any([nlp_task.matches(task) for nlp_task in cls.nlp]) - - @classmethod - def is_cv(cls, task: str) -> bool: - return ( - cls.is_yolo(task) - or cls.is_yolov8(task) - or cls.is_yolact(task) - or cls.is_image_classification(task) - or cls.is_open_pif_paf(task) - ) - @classmethod def is_image_classification(cls, task: str) -> bool: """ @@ -284,67 +151,9 @@ def is_image_classification(cls, task: str) -> bool: """ return any([ic_task.matches(task) for ic_task in cls.image_classification]) - @classmethod - def is_yolo(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is an image - segmentation task using YOLO - :return: True if it is an segmentation task using YOLO, False otherwise - """ - return any([yolo_task.matches(task) for yolo_task in cls.yolo]) - - @classmethod - def is_yolov8(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is an image - segmentation task using YOLOv8 - :return: True if it is an segmentation task using YOLOv8, False otherwise - """ - return any([yolov8_task.matches(task) for yolov8_task in cls.yolov8]) - - @classmethod - def is_yolact(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is an image - segmentation task using YOLO - :return: True if it is an segmentation task using YOLO, False otherwise - """ - return any([yolact_task.matches(task) for yolact_task in cls.yolact]) - - @classmethod - def is_haystack(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is a haystack task - :return: True if it is a haystack task, False otherwise - """ - return any([haystack_task.matches(task) for haystack_task in cls.haystack]) - - @classmethod - def is_embedding_extraction(cls, task): - """ - :param task: the name of the task to check whether it is an - embedding_extraction task - :return: True if it is an embedding_extraction task, False otherwise - """ - return any( - embedding_extraction_task.matches(task) - for embedding_extraction_task in cls.embedding_extraction - ) - - @classmethod - def is_open_pif_paf(cls, task): - """ - :param task: the name of the task to check whether it is an - embedding_extraction task - :return: True if it is an open_pif_paf task, False otherwise - """ - return any( - open_pif_paf_task.matches(task) for open_pif_paf_task in cls.open_pif_paf - ) - @classmethod def task_names(cls): - task_names = ["custom"] + task_names = [] for task_category in cls.all_task_categories: for task in task_category: unique_aliases = ( @@ -353,6 +162,18 @@ def task_names(cls): task_names += (task._name, *unique_aliases) return task_names + @classmethod + def is_code_generation(cls, task: str) -> bool: + """ + :param task: the name of the task to check whether it is a text generation task + such as codegen + :return: True if it is a text generation task, False otherwise + """ + return any( + code_generation_task.matches(task) + for code_generation_task in cls.code_generation + ) + def dynamic_import_task(module_or_path: str) -> str: """ diff --git a/src/deepsparse/transformers/engines/nl_decoder_engine.py b/src/deepsparse/transformers/engines/nl_decoder_engine.py index 99ab552660..1886b5a013 100644 --- a/src/deepsparse/transformers/engines/nl_decoder_engine.py +++ b/src/deepsparse/transformers/engines/nl_decoder_engine.py @@ -18,7 +18,7 @@ import numpy from deepsparse.engine import Context -from deepsparse.pipeline import DEEPSPARSE_ENGINE, create_engine +from deepsparse.legacy.pipeline import DEEPSPARSE_ENGINE, create_engine from deepsparse.transformers.utils.decoder_kv_cache import DecoderKVCache from deepsparse.transformers.utils.timings import TextGenerationTimings from deepsparse.utils import TimerManager diff --git a/src/deepsparse/transformers/haystack/pipeline.py b/src/deepsparse/transformers/haystack/pipeline.py index 84aaa74c52..9fc6f28185 100644 --- a/src/deepsparse/transformers/haystack/pipeline.py +++ b/src/deepsparse/transformers/haystack/pipeline.py @@ -38,7 +38,7 @@ from haystack.schema import Document from pydantic import BaseModel, Field -from deepsparse import Pipeline +from deepsparse.legacy.pipeline import Pipeline from deepsparse.transformers import haystack as DeepSparseHaystack diff --git a/src/deepsparse/transformers/pipelines/chat.py b/src/deepsparse/transformers/pipelines/chat.py index 7f4497f88e..7a24bf070d 100644 --- a/src/deepsparse/transformers/pipelines/chat.py +++ b/src/deepsparse/transformers/pipelines/chat.py @@ -20,12 +20,14 @@ import numpy from pydantic import Field, validator -from deepsparse import Pipeline -from deepsparse.transformers.pipelines.text_generation import ( +from deepsparse.legacy import Pipeline +from deepsparse.legacy.transformers.pipelines.text_generation import ( + TextGenerationPipeline, +) +from deepsparse.transformers.schemas.text_generation_schemas import ( FinishReason, TextGenerationInput, TextGenerationOutput, - TextGenerationPipeline, ) from deepsparse.transformers.utils import ( DecoderKVCache, diff --git a/src/deepsparse/transformers/pipelines/code_generation.py b/src/deepsparse/transformers/pipelines/code_generation.py index ffbb7387d4..545d1b689e 100644 --- a/src/deepsparse/transformers/pipelines/code_generation.py +++ b/src/deepsparse/transformers/pipelines/code_generation.py @@ -13,17 +13,14 @@ # limitations under the License. -from deepsparse import Pipeline +from deepsparse.operators import OperatorRegistry from deepsparse.transformers.pipelines.text_generation import TextGenerationPipeline __all__ = ["CodeGenerationPipeline"] -@Pipeline.register( - task="code_generation", - task_aliases=["codegen"], -) +@OperatorRegistry.register(name=["code_generation", "code_gen", "codegen"]) class CodeGenerationPipeline(TextGenerationPipeline): """ Subclass of text generation pipeline to support any defaults or diff --git a/src/deepsparse/transformers/pipelines/embedding_extraction.py b/src/deepsparse/transformers/pipelines/embedding_extraction.py index 1c33f68697..9429a2fc73 100644 --- a/src/deepsparse/transformers/pipelines/embedding_extraction.py +++ b/src/deepsparse/transformers/pipelines/embedding_extraction.py @@ -41,7 +41,7 @@ from pydantic import BaseModel, Field from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.log import get_main_logger from deepsparse.transformers.helpers import truncate_transformer_onnx_model from deepsparse.transformers.pipelines import TransformersPipeline diff --git a/src/deepsparse/transformers/pipelines/pipeline.py b/src/deepsparse/transformers/pipelines/pipeline.py index 0d54449e56..393d5d449c 100644 --- a/src/deepsparse/transformers/pipelines/pipeline.py +++ b/src/deepsparse/transformers/pipelines/pipeline.py @@ -26,7 +26,7 @@ import transformers from transformers.models.auto import AutoTokenizer -from deepsparse import Bucketable, Pipeline +from deepsparse.legacy import Bucketable, Pipeline from deepsparse.transformers.helpers import ( get_deployment_path, overwrite_transformer_onnx_model_inputs, diff --git a/src/deepsparse/transformers/pipelines/question_answering.py b/src/deepsparse/transformers/pipelines/question_answering.py index 7a60a2ddc8..dfd82e0e82 100644 --- a/src/deepsparse/transformers/pipelines/question_answering.py +++ b/src/deepsparse/transformers/pipelines/question_answering.py @@ -44,7 +44,7 @@ from pydantic import BaseModel, Field from transformers.data import SquadExample -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.transformers.pipelines import TransformersPipeline diff --git a/src/deepsparse/transformers/pipelines/text_classification.py b/src/deepsparse/transformers/pipelines/text_classification.py index 1ceea46235..43bc22edd3 100644 --- a/src/deepsparse/transformers/pipelines/text_classification.py +++ b/src/deepsparse/transformers/pipelines/text_classification.py @@ -42,7 +42,7 @@ from pydantic import BaseModel, Field from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.transformers.pipelines import TransformersPipeline diff --git a/src/deepsparse/v2/text_generation/__init__.py b/src/deepsparse/transformers/pipelines/text_generation/__init__.py similarity index 100% rename from src/deepsparse/v2/text_generation/__init__.py rename to src/deepsparse/transformers/pipelines/text_generation/__init__.py diff --git a/src/deepsparse/v2/text_generation/autoregressive_preprocess_operator.py b/src/deepsparse/transformers/pipelines/text_generation/autoregressive_preprocess_operator.py similarity index 97% rename from src/deepsparse/v2/text_generation/autoregressive_preprocess_operator.py rename to src/deepsparse/transformers/pipelines/text_generation/autoregressive_preprocess_operator.py index 17d8dd662c..9fb17f3946 100644 --- a/src/deepsparse/v2/text_generation/autoregressive_preprocess_operator.py +++ b/src/deepsparse/transformers/pipelines/text_generation/autoregressive_preprocess_operator.py @@ -15,9 +15,9 @@ import logging from typing import Any +from deepsparse.operators import Operator from deepsparse.transformers.utils.helpers import compute_engine_inputs -from deepsparse.v2.operators import Operator -from deepsparse.v2.utils import PipelineState +from deepsparse.utils import PipelineState _LOGGER = logging.getLogger(__name__) diff --git a/src/deepsparse/v2/text_generation/compile_generated_tokens.py b/src/deepsparse/transformers/pipelines/text_generation/compile_generated_tokens.py similarity index 94% rename from src/deepsparse/v2/text_generation/compile_generated_tokens.py rename to src/deepsparse/transformers/pipelines/text_generation/compile_generated_tokens.py index 630067f8c3..3cd16e2888 100644 --- a/src/deepsparse/v2/text_generation/compile_generated_tokens.py +++ b/src/deepsparse/transformers/pipelines/text_generation/compile_generated_tokens.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from deepsparse.v2.operators import Operator -from deepsparse.v2.utils import InferenceState +from deepsparse.operators import Operator +from deepsparse.utils import InferenceState __all__ = ["CompileGeneratedTokens"] diff --git a/src/deepsparse/v2/text_generation/compile_generations.py b/src/deepsparse/transformers/pipelines/text_generation/compile_generations.py similarity index 91% rename from src/deepsparse/v2/text_generation/compile_generations.py rename to src/deepsparse/transformers/pipelines/text_generation/compile_generations.py index ed8297ac01..2187e525a1 100644 --- a/src/deepsparse/v2/text_generation/compile_generations.py +++ b/src/deepsparse/transformers/pipelines/text_generation/compile_generations.py @@ -16,9 +16,9 @@ import numpy from pydantic import BaseModel, Field -from deepsparse.transformers.pipelines.text_generation import FinishReason -from deepsparse.v2.operators import Operator -from deepsparse.v2.utils import InferenceState +from deepsparse.operators import Operator +from deepsparse.transformers.schemas.text_generation_schemas import FinishReason +from deepsparse.utils import InferenceState __all__ = ["CompileGenerations", "CompileGenerationsOutput"] diff --git a/src/deepsparse/v2/text_generation/compile_logits.py b/src/deepsparse/transformers/pipelines/text_generation/compile_logits.py similarity index 89% rename from src/deepsparse/v2/text_generation/compile_logits.py rename to src/deepsparse/transformers/pipelines/text_generation/compile_logits.py index 48a7158f66..7785880980 100644 --- a/src/deepsparse/v2/text_generation/compile_logits.py +++ b/src/deepsparse/transformers/pipelines/text_generation/compile_logits.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from deepsparse.v2.operators import Operator -from deepsparse.v2.text_generation.nl_engine_operator import NLEngineOutputs -from deepsparse.v2.utils import InferenceState +from deepsparse.operators import Operator +from deepsparse.transformers.pipelines.text_generation.nl_engine_operator import ( + NLEngineOutputs, +) +from deepsparse.utils import InferenceState __all__ = ["CompilePromptLogits"] diff --git a/src/deepsparse/v2/text_generation/generate_new_token.py b/src/deepsparse/transformers/pipelines/text_generation/generate_new_token.py similarity index 92% rename from src/deepsparse/v2/text_generation/generate_new_token.py rename to src/deepsparse/transformers/pipelines/text_generation/generate_new_token.py index 5bf48bbdbc..2ff21af54f 100644 --- a/src/deepsparse/v2/text_generation/generate_new_token.py +++ b/src/deepsparse/transformers/pipelines/text_generation/generate_new_token.py @@ -15,10 +15,12 @@ import transformers -from deepsparse.transformers.pipelines.text_generation import FinishReason -from deepsparse.v2.operators import Operator -from deepsparse.v2.text_generation.nl_engine_operator import NLEngineOutputs -from deepsparse.v2.utils import InferenceState +from deepsparse.operators import Operator +from deepsparse.transformers.pipelines.text_generation.nl_engine_operator import ( + NLEngineOutputs, +) +from deepsparse.transformers.schemas.text_generation_schemas import FinishReason +from deepsparse.utils import InferenceState __all__ = ["GenerateNewTokenOperator"] diff --git a/src/deepsparse/v2/text_generation/join_output.py b/src/deepsparse/transformers/pipelines/text_generation/join_output.py similarity index 93% rename from src/deepsparse/v2/text_generation/join_output.py rename to src/deepsparse/transformers/pipelines/text_generation/join_output.py index 8a6c77a2f1..b8176c19db 100644 --- a/src/deepsparse/v2/text_generation/join_output.py +++ b/src/deepsparse/transformers/pipelines/text_generation/join_output.py @@ -16,9 +16,11 @@ import numpy +from deepsparse.operators import Operator +from deepsparse.transformers.pipelines.text_generation.compile_generations import ( + CompileGenerationsOutput, +) from deepsparse.transformers.utils.helpers import pad_to_fixed_length -from deepsparse.v2.operators import Operator -from deepsparse.v2.text_generation.compile_generations import CompileGenerationsOutput __all__ = ["JoinOutput"] diff --git a/src/deepsparse/v2/text_generation/kv_cache_operator.py b/src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py similarity index 98% rename from src/deepsparse/v2/text_generation/kv_cache_operator.py rename to src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py index 3c15d0ff5a..7dd171c625 100644 --- a/src/deepsparse/v2/text_generation/kv_cache_operator.py +++ b/src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py @@ -16,12 +16,12 @@ from pydantic import BaseModel, Field +from deepsparse.operators import Operator from deepsparse.transformers.utils import DecoderKVCache from deepsparse.transformers.utils.helpers import ( initialize_kv_cache_state, prepends_bos_token, ) -from deepsparse.v2.operators import Operator __all__ = ["KVCacheCreator", "KVCacheCreatorInput"] diff --git a/src/deepsparse/v2/text_generation/multi_engine_prefill_operator.py b/src/deepsparse/transformers/pipelines/text_generation/multi_engine_prefill_operator.py similarity index 96% rename from src/deepsparse/v2/text_generation/multi_engine_prefill_operator.py rename to src/deepsparse/transformers/pipelines/text_generation/multi_engine_prefill_operator.py index 513c34dfc2..dca4fc3ff9 100644 --- a/src/deepsparse/v2/text_generation/multi_engine_prefill_operator.py +++ b/src/deepsparse/transformers/pipelines/text_generation/multi_engine_prefill_operator.py @@ -15,9 +15,9 @@ import logging from typing import Any +from deepsparse.operators import Operator from deepsparse.transformers.utils.helpers import compute_engine_inputs -from deepsparse.v2.operators import Operator -from deepsparse.v2.utils import PipelineState +from deepsparse.utils import PipelineState _LOGGER = logging.getLogger(__name__) diff --git a/src/deepsparse/v2/text_generation/nl_engine_operator.py b/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py similarity index 98% rename from src/deepsparse/v2/text_generation/nl_engine_operator.py rename to src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py index d8c80bbaee..d77fbf68df 100644 --- a/src/deepsparse/v2/text_generation/nl_engine_operator.py +++ b/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py @@ -20,16 +20,16 @@ import numpy from pydantic import BaseModel, Field +from deepsparse.operators.engine_operator import ( + DEEPSPARSE_ENGINE, + EngineOperator, + EngineOperatorInputs, +) from deepsparse.utils import join_engine_outputs, split_engine_inputs from deepsparse.utils.onnx import ( CACHE_INPUT_PREFIX, overwrite_onnx_model_inputs_for_kv_cache_models, ) -from deepsparse.v2.operators.engine_operator import ( - DEEPSPARSE_ENGINE, - EngineOperator, - EngineOperatorInputs, -) __all__ = ["NLEngineOperator", "NLEngineInputs"] @@ -130,7 +130,9 @@ def __init__( self.internal_kv_cache = internal_kv_cache self.model_path = kwargs.get("model_path") (onnx_file_path, additional_outputs) = self.override_model_inputs( - self.model_path, batch_size=1, return_additional_outputs=True + self.model_path, + batch_size=kwargs.get("batch_size", 1), + return_additional_outputs=True, ) output_indices_to_be_cached, kv_cache_data_type, = additional_outputs.get( "output_indices_to_be_cached" diff --git a/src/deepsparse/v2/text_generation/pipeline.py b/src/deepsparse/transformers/pipelines/text_generation/pipeline.py similarity index 93% rename from src/deepsparse/v2/text_generation/pipeline.py rename to src/deepsparse/transformers/pipelines/text_generation/pipeline.py index 6e27942d19..30cbe99081 100644 --- a/src/deepsparse/v2/text_generation/pipeline.py +++ b/src/deepsparse/transformers/pipelines/text_generation/pipeline.py @@ -13,17 +13,15 @@ # limitations under the License. import logging -from typing import Dict, List, Optional +from typing import List, Optional +from deepsparse.operators import EngineOperator +from deepsparse.operators.registry import OperatorRegistry +from deepsparse.pipeline import Pipeline +from deepsparse.routers import GraphRouter +from deepsparse.schedulers import ContinuousBatchingScheduler, OperatorScheduler from deepsparse.transformers.helpers import setup_transformers_pipeline -from deepsparse.transformers.utils.helpers import process_generation_config -from deepsparse.utils import split_engine_inputs -from deepsparse.v2.operators import EngineOperator -from deepsparse.v2.operators.registry import OperatorRegistry -from deepsparse.v2.pipeline import Pipeline -from deepsparse.v2.routers import GraphRouter -from deepsparse.v2.schedulers import ContinuousBatchingScheduler, OperatorScheduler -from deepsparse.v2.text_generation import ( +from deepsparse.transformers.pipelines.text_generation import ( AutoRegressiveOperatorPreprocess, CompileGeneratedTokens, CompileGenerations, @@ -39,7 +37,8 @@ ProcessOutputs, TokenGeneratorOperator, ) -from deepsparse.v2.utils import PipelineState +from deepsparse.transformers.utils.helpers import process_generation_config +from deepsparse.utils import PipelineState, split_engine_inputs _LOGGER = logging.getLogger(__name__) @@ -56,7 +55,7 @@ def __init__( force_max_tokens: bool = False, generation_config=None, continuous_batch_sizes: Optional[List[int]] = None, - engine_kwargs: Optional[Dict] = None, + **engine_kwargs, ): ( self.model_path, @@ -145,8 +144,8 @@ def __init__( continuous_batching_scheduler = None if continuous_batch_sizes: if internal_kv_cache: - _LOGGER.warn( - "internal kv_cache is not supported with continuous_batching " + _LOGGER.warning( + "continuous_batching is not supported with internal_kv_cache" ) else: continuous_batching_scheduler = self._get_continuous_batching_scheduler( diff --git a/src/deepsparse/v2/text_generation/prep_for_generation.py b/src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py similarity index 93% rename from src/deepsparse/v2/text_generation/prep_for_generation.py rename to src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py index 0ea4a06a02..0ac010aedf 100644 --- a/src/deepsparse/v2/text_generation/prep_for_generation.py +++ b/src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py @@ -16,11 +16,11 @@ import numpy -from deepsparse.transformers.pipelines.text_generation import FinishReason +from deepsparse.operators import Operator +from deepsparse.transformers.pipelines.text_generation import TokenGeneratorOperator +from deepsparse.transformers.schemas.text_generation_schemas import FinishReason from deepsparse.transformers.utils.helpers import set_generated_length -from deepsparse.v2.operators import Operator -from deepsparse.v2.text_generation import TokenGeneratorOperator -from deepsparse.v2.utils import InferenceState +from deepsparse.utils import InferenceState __all__ = ["PrepareGeneration"] diff --git a/src/deepsparse/v2/text_generation/prep_for_prefill.py b/src/deepsparse/transformers/pipelines/text_generation/prep_for_prefill.py similarity index 96% rename from src/deepsparse/v2/text_generation/prep_for_prefill.py rename to src/deepsparse/transformers/pipelines/text_generation/prep_for_prefill.py index 2e5fecb3e8..47b4965daf 100644 --- a/src/deepsparse/v2/text_generation/prep_for_prefill.py +++ b/src/deepsparse/transformers/pipelines/text_generation/prep_for_prefill.py @@ -15,8 +15,8 @@ import logging from typing import Any -from deepsparse.v2.operators import Operator -from deepsparse.v2.utils import PipelineState +from deepsparse.operators import Operator +from deepsparse.utils import PipelineState _LOGGER = logging.getLogger(__name__) diff --git a/src/deepsparse/v2/text_generation/process_inputs.py b/src/deepsparse/transformers/pipelines/text_generation/process_inputs.py similarity index 97% rename from src/deepsparse/v2/text_generation/process_inputs.py rename to src/deepsparse/transformers/pipelines/text_generation/process_inputs.py index 0f9147f916..05e93a9cc6 100644 --- a/src/deepsparse/v2/text_generation/process_inputs.py +++ b/src/deepsparse/transformers/pipelines/text_generation/process_inputs.py @@ -17,7 +17,8 @@ import transformers -from deepsparse.transformers.pipelines.text_generation import ( +from deepsparse.operators import Operator +from deepsparse.transformers.schemas.text_generation_schemas import ( GenerationDefaults, TextGenerationInput, ) @@ -26,7 +27,6 @@ override_config, repeat_inputs, ) -from deepsparse.v2.operators import Operator __all__ = ["ProcessInputsTextGeneration"] diff --git a/src/deepsparse/v2/text_generation/process_outputs.py b/src/deepsparse/transformers/pipelines/text_generation/process_outputs.py similarity index 94% rename from src/deepsparse/v2/text_generation/process_outputs.py rename to src/deepsparse/transformers/pipelines/text_generation/process_outputs.py index 7173b8e256..15434175b9 100644 --- a/src/deepsparse/v2/text_generation/process_outputs.py +++ b/src/deepsparse/transformers/pipelines/text_generation/process_outputs.py @@ -16,13 +16,13 @@ import numpy -from deepsparse.transformers.pipelines.text_generation import ( +from deepsparse.operators import Operator +from deepsparse.transformers.schemas.text_generation_schemas import ( FinishReason, GeneratedText, TextGenerationOutput, ) -from deepsparse.v2.operators import Operator -from deepsparse.v2.utils import InferenceState +from deepsparse.utils import InferenceState class ProcessOutputs(Operator): diff --git a/src/deepsparse/v2/text_generation/token_generator.py b/src/deepsparse/transformers/pipelines/text_generation/token_generator.py similarity index 96% rename from src/deepsparse/v2/text_generation/token_generator.py rename to src/deepsparse/transformers/pipelines/text_generation/token_generator.py index 9148d71cc8..3f46abd86f 100644 --- a/src/deepsparse/v2/text_generation/token_generator.py +++ b/src/deepsparse/transformers/pipelines/text_generation/token_generator.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from deepsparse.operators import Operator from deepsparse.transformers.utils.token_generator import TokenGenerator -from deepsparse.v2.operators import Operator __all__ = ["TokenGeneratorOperator"] diff --git a/src/deepsparse/transformers/pipelines/token_classification.py b/src/deepsparse/transformers/pipelines/token_classification.py index 66957fce97..4c719ee0c2 100644 --- a/src/deepsparse/transformers/pipelines/token_classification.py +++ b/src/deepsparse/transformers/pipelines/token_classification.py @@ -40,7 +40,7 @@ from transformers.file_utils import ExplicitEnum from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.transformers.pipelines import TransformersPipeline diff --git a/src/deepsparse/transformers/pipelines/zero_shot_text_classification.py b/src/deepsparse/transformers/pipelines/zero_shot_text_classification.py index be24d0cd7d..bf0faa5c0f 100644 --- a/src/deepsparse/transformers/pipelines/zero_shot_text_classification.py +++ b/src/deepsparse/transformers/pipelines/zero_shot_text_classification.py @@ -58,7 +58,7 @@ from pydantic import BaseModel, Field -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.transformers.pipelines import TransformersPipeline diff --git a/src/deepsparse/transformers/pipelines_cli.py b/src/deepsparse/transformers/pipelines_cli.py index 9243fdf5d2..887af4c594 100644 --- a/src/deepsparse/transformers/pipelines_cli.py +++ b/src/deepsparse/transformers/pipelines_cli.py @@ -86,7 +86,7 @@ from pydantic import BaseModel from deepsparse import Pipeline -from deepsparse.pipeline import SUPPORTED_PIPELINE_ENGINES +from deepsparse.operators.engine_operator import SUPPORTED_PIPELINE_ENGINES from deepsparse.transformers import fix_numpy_types from deepsparse.transformers.loaders import SUPPORTED_EXTENSIONS, get_batch_loader diff --git a/src/deepsparse/transformers/schemas/__init__.py b/src/deepsparse/transformers/schemas/__init__.py new file mode 100644 index 0000000000..e59f70d938 --- /dev/null +++ b/src/deepsparse/transformers/schemas/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa + +from .text_generation_schemas import * diff --git a/src/deepsparse/transformers/schemas/text_generation_schemas.py b/src/deepsparse/transformers/schemas/text_generation_schemas.py new file mode 100644 index 0000000000..7e657f1098 --- /dev/null +++ b/src/deepsparse/transformers/schemas/text_generation_schemas.py @@ -0,0 +1,167 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import pathlib +from enum import Enum +from typing import Any, Callable, Dict, List, Optional, Sequence, Union + +from pydantic import BaseModel, Field +from transformers import GenerationConfig + + +# Based off of https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig # noqa E501 +class GenerationDefaults: + # Parameters that control the length of the output + max_length = None + max_new_tokens = 100 + # Parameters that control the generation strategy used + do_sample = False + # Parameters for manipulation of the model output logits + temperature = 1.0 + top_k = 50 + top_p = 1.0 + repetition_penalty = 1.0 + # Parameters that define the outputs + num_return_sequences = 1 + output_scores = False + + +class FinishReason(Enum): + STOP = "stop" + LENGTH = "length" + TIME = "time" + CALLBACK = "callback" + CAPACITY = "capacity" + MAX_NEW_TOKENS = "max_new_tokens" + + +class TextGenerationInput(BaseModel): + class Config: + arbitrary_types_allowed = True + + sequences: Union[str, List[str]] = Field( + alias="prompt", + description="The input sequences to generate the text from.", + ) + return_input_tokens: bool = Field( + default=False, + description="A flag that indicates whether to return " "the input_tokens. ", + ) + include_prompt_logits: bool = Field( + default=False, + description="A flag that indicates whether to return " + "the logits for the prompt. If set, prompt_logits are " + "`prepended` to the logits for the generated text sequence." + "Note: This flag is only applicable when output_scores " + "is `True`.", + ) + fixed_sequences_length: bool = Field( + default=False, + description="A flag that indicates whether to modify " + "(pad or truncate) each input text sequence, so that " + "its tokenized length is equal to `sequence_length` " + "of tokens. Useful, when a batch of predictions needs " + "to have consistent length so one " + "can compute metric in a batched fashion. ", + ) + streaming: bool = Field( + default=False, + description="Whether to stream the results back as they are generated. If " + "True, then the results are returned as a generator object which yields " + "the results as they are generated. If False, then the results are returned " + "as a list after it has completed.", + ) + callback: Optional[Callable[[Any], Union[bool, Any]]] = Field( + default=None, + description="Callable that will be invoked " + "on each generated token. If the callable returns " + "`False`, the generation will stop. Default is `None`.", + ) + stop: Union[None, str, Sequence[str]] = Field( + default=None, + description="A string or a list of strings that will be used as" + " stop tokens. (token generation will stop when any of the stop" + " tokens is generated). Set to `None` to ignore this parameter." + " Default is `None`.", + ) + + presence_penalty: Optional[float] = Field( + default=0.0, + description="Penalty applied for generating new token. Any existing" + " token results in the subtraction of its corresponding logit value." + " Default set to 0.0", + ) + + generation_config: Union[None, str, pathlib.Path, Dict, GenerationConfig] = Field( + default=None, + description="GenerationConfig file consisting of parameters used to control " + "sequences generated for each prompt. The current supported parameters are: " + "max_length, max_new_tokens, num_return_sequences, output_scores, top_p, " + "top_k, repetition_penalty, do_sample, temperature. If None is provided, " + "deepsparse defaults will be used. For all other input types, HuggingFace " + "defaults for GenerationConfig will be used. ", + ) + + generation_kwargs: Optional[Dict] = Field( + default=None, + description="Any arguments to override generation_config arguments. Refer to " + "the generation_config argument for a full list of supported variables.", + ) + + +class GeneratedText(BaseModel): + text: str = Field( + description="The generated sequence for a given prompt. If " + "streaming is enabled, this will be the next generated token." + ) + score: Optional[Any] = Field( + default=None, + description="The score for the generated token or sequence. " + "The scores have the shape [sequence_length, vocab_size]", + ) + finished: bool = Field(description="Whether generation has stopped.") + finished_reason: Optional[str] = Field( + default=None, + description="The reason for generation to stop. " + "Defined by FinishReason. One of stop, length, or time.", + ) + + +# TODO: Pydantic aliases allow assignment but not reference. Still need to update. +class TextGenerationOutput(BaseModel): + created: datetime.datetime = Field(description="Time of inference creation.") + prompts: Union[str, List[str]] = Field( + description="Prompts used for the sequence generation. For multiple input " + "prompts, a list of prompts is returned" + ) + generations: Union[List[GeneratedText], List[List[GeneratedText]]] = Field( + description="For a single prompt, a single list of GeneratedText is returned. " + "If multiple prompts are given, a list of GeneratedText is returned for each " + "prompt provided. If streamng is enabled, the next generated token is returned." + "Otherwise, the full generated sequence is returned." + ) + input_tokens: Optional[ + Any + ] = Field( # dictionary mapping "token_ids" and "attention_mask" to numpy arrays + default=None, + description="The output of the tokenizer." + "Dictionary containing token_ids and attention_mask, " + "both mapping to arrays of size " + "[batch_size, sequence_length]", + ) + + class Config: + arbitrary_types_allowed = True + extra = "allow" diff --git a/src/deepsparse/utils/__init__.py b/src/deepsparse/utils/__init__.py index 8ad6b624da..dafa92b7ed 100644 --- a/src/deepsparse/utils/__init__.py +++ b/src/deepsparse/utils/__init__.py @@ -16,5 +16,9 @@ from .cli_helpers import * from .data import * +from .helpers import * from .onnx import * +from .state import * +from .subgraph import * from .timer import * +from .types import * diff --git a/src/deepsparse/v2/utils/helpers.py b/src/deepsparse/utils/helpers.py similarity index 100% rename from src/deepsparse/v2/utils/helpers.py rename to src/deepsparse/utils/helpers.py diff --git a/src/deepsparse/v2/utils/state.py b/src/deepsparse/utils/state.py similarity index 100% rename from src/deepsparse/v2/utils/state.py rename to src/deepsparse/utils/state.py diff --git a/src/deepsparse/v2/utils/data.py b/src/deepsparse/utils/subgraph.py similarity index 96% rename from src/deepsparse/v2/utils/data.py rename to src/deepsparse/utils/subgraph.py index 9ed340cb7c..d20717dcd7 100644 --- a/src/deepsparse/v2/utils/data.py +++ b/src/deepsparse/utils/subgraph.py @@ -15,7 +15,7 @@ from dataclasses import dataclass from typing import Any, List -from deepsparse.v2.utils import InferenceState +from deepsparse.utils import InferenceState __all__ = ["SubGraph"] diff --git a/src/deepsparse/v2/utils/types.py b/src/deepsparse/utils/types.py similarity index 100% rename from src/deepsparse/v2/utils/types.py rename to src/deepsparse/utils/types.py diff --git a/src/deepsparse/v2/pipeline.py b/src/deepsparse/v2/pipeline.py deleted file mode 100644 index 40d41c586e..0000000000 --- a/src/deepsparse/v2/pipeline.py +++ /dev/null @@ -1,378 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import asyncio -import copy -from typing import Any, Dict, List, Optional, Union - -from deepsparse.v2.operators import EngineOperator, Operator -from deepsparse.v2.routers import Router -from deepsparse.v2.schedulers import ( - ContinuousBatchingScheduler, - OperatorScheduler, - SchedulerGroup, -) -from deepsparse.v2.utils import InferenceState, PipelineState -from deepsparse.v2.utils.data import SubGraph -from deepsparse.v2.utils.helpers import run_func - - -__all__ = ["Pipeline"] - - -class Pipeline(Operator): - """ - Pipeline accepts a series of operators, schedulers, and a router. Calling a pipeline - will use the router to run through all the defined operators. The operators should - be implemented using the Operator class and each implemented operator should be - responsible for a functional component of the pipelines. The flow of inputs/outputs - between the operators and the steps in the pipeline should be defined by the router, - (based off of the Router class), which dicates the next operator in the pipeline. - Execution of the operators will be handled by the provided schedulers. - - :param ops: Operators to run within the pipeline. Can either be a list of operators - or dictionary of operators. - :param router: A Router which dictates the next operator to call. - :param schedulers: A list of schedulers to run operators. - :param pipeline_state: pipeline_state created during pipeline initialization - - """ - - def __init__( - self, - ops: Union[Dict[str, Operator], List[Operator]], - router: Router, - schedulers: List[OperatorScheduler], - continuous_batching_scheduler: Optional[ContinuousBatchingScheduler] = None, - pipeline_state: Optional[PipelineState] = None, - ): - - self.ops = ops - self.router = router - self.schedulers = schedulers - self.pipeline_state = pipeline_state - self._continuous_batching_scheduler = continuous_batching_scheduler - self.validate() - - self._scheduler_group = SchedulerGroup(self.schedulers) - - def _run_next( - self, inp: Any, inference_state: InferenceState, next_step: str, **kwargs - ): - if ( - isinstance(self.ops[next_step], EngineOperator) - and self._continuous_batching_scheduler - ): - func = self._continuous_batching_scheduler.submit - inp = self.ops[next_step].input_schema(**inp) - else: - func = self._scheduler_group.submit - - return run_func( - func=func, - operator=self.ops[next_step], - inp=inp, - pipeline_state=self.pipeline_state, - inference_state=inference_state, - **kwargs, - ) - - async def _run_sub_graphs( - self, - sub_graph_inputs: List[Any], - sub_graphs: List[SubGraph], - loop: Optional[asyncio.AbstractEventLoop] = None, - ) -> List[Any]: - """ - Run a list of sub_graphs asynchronously. Polls to identify the sub graph that is - still running but has completed its current step. Schedules the next step - subgraph step. This is repeated until all subgraphs have finished running and - have reached their end step (stored in the Subgraph.end attribute). - - :param sub_graph_inputs: A list of inputs that should be passed to each - subgraph. Each subgraph is given an element of the list as input to its - first node. - :param sub_graphs: A list of Subgraph objects. Each stores the relevant - execution information for the particular subgraph, such as its current step - in the sub graph, inference state, output, and end step. - - :returns: a list of outputs for all the completed Subgraph objects. Returned - in the same order that the subgraphs were passed to the function. - """ - for i in range(len(sub_graphs)): - sub_graphs[i].output = self._run_next( - sub_graph_inputs[i], sub_graphs[i].inf, sub_graphs[i].step, loop=loop - ) - - # Execute all sub graphs until all graphs have been completed. - while any(not x.completed for x in sub_graphs): - for sub_graph in sub_graphs: - if not sub_graph.completed: - # get the result for the completed operator; resolve its output - if isinstance(sub_graph.output, asyncio.Future): - await sub_graph.output - operator_output = sub_graph.output.result() - operator_output = sub_graph.parse_output(operator_output) - - # determine the next step for the particular operator, using - # its previous output and previously stored step - next_step = self.router.next( - sub_graph.step, self.ops, operator_output - ) - # update the step - sub_graph.step = next_step - - # store the output for the next step. If the next step is - # end step, this particular route has completed. Simply - # update the output value - if next_step in sub_graph.end: - sub_graph.output = operator_output - sub_graph.completed = True - else: - sub_graph.output = self._run_next( - inp=operator_output, - inference_state=sub_graph.inf, - next_step=next_step, - loop=loop, - ) - - return [x.output for x in sub_graphs] - - async def run_async(self, *args, inference_state: InferenceState, **kwargs): - """ - Run through the operators using the provided router and scheduler. - The input to a given operator is the output of the previous operator. - - :param inference_state: inference_state for the pipeline. - :param pipeline_state: pipeline_state for the pipeline. The values in the state - are created during pipeline creation and are read-only during inference. - """ - loop = asyncio.get_running_loop() - - next_step = self.router.START_ROUTE - operator_output = None - - while next_step != self.router.END_ROUTE: - # Either a dictionary key or valid index - - if next_step == self.router.SPLIT_ROUTE: - if operator_output is None: - raise ValueError( - f"{self.router.SPLIT_ROUTE} should appear after " - f"{self.ROUTER.START_ROUTE}" - ) - - operator_output = await self._apply_split( - operator_output, inference_state, loop=loop - ) - next_step = self.router.route[self.router.JOIN_ROUTE] - if next_step == self.router.END_ROUTE: - return operator_output - - if next_step == self.router.START_ROUTE: - outputs = run_func( - *args, - func=self._scheduler_group.submit, - operator=self.ops[next_step], - inference_state=inference_state, - pipeline_state=self.pipeline_state, - loop=loop, - **kwargs, - ) - await outputs - operator_output = outputs.result() - - else: - outputs = self._run_next( - inp=operator_output, - next_step=next_step, - inference_state=inference_state, - loop=loop, - ) - await outputs - operator_output = outputs.result() - - if isinstance(operator_output, tuple): - state_update = operator_output[-1] - operator_output = operator_output[0] - - next_step = self.router.next(next_step, self.ops, operator_output) - if state_update: - inference_state.update_state(state_update) - return operator_output - - async def _apply_split( - self, - inp: Any, - inference_state: InferenceState, - loop: Optional[asyncio.AbstractEventLoop] = None, - ): - batches, orig_batch_size = self.expand_inputs(inp, 1) - - # Create a list of SplitRoutes, per batch size 1 - # Each SplitRoute object holds information about the particular path it - # follows. All start at the same step defined by SPLIT_ROUTE and start - # with the same inference_state. - split_graphs = [ - SubGraph( - inf=copy.deepcopy(inference_state), - step=self.router.route[self.router.SPLIT_ROUTE], - end=[self.router.JOIN_ROUTE], - ) - for i in range(len(batches)) - ] - - outputs = await self._run_sub_graphs( - sub_graph_inputs=batches, sub_graphs=split_graphs, loop=loop - ) - return self.condense_inputs(outputs) - - @staticmethod - def create(task: str, **kwargs) -> "Pipeline": - """ - :param task: Pipeline task - :param kwargs: extra task specific kwargs to be passed to the Pipeline - :return: pipeline object initialized for the given task - """ - pipeline = Operator.create(task=task, **kwargs) - if not isinstance(pipeline, Pipeline): - raise RuntimeError( - "Pipeline was not created for the given task. The " - "provided task should be registered using the OperatorRegistry" - ) - return pipeline - - def run( - self, - *args, - inference_state: InferenceState, - **kwargs, - ): - """ - Run through the operators using the provided router and scheduler. - The input to a given operator is the output of the previous operator. - - :param inference_state: inference_state for the pipeline. - :param pipeline_state: pipeline_state for the pipeline. The values in the state - are created during pipeline creation and are read-only during inference. - """ - next_step = self.router.START_ROUTE - operator_output = None - while next_step != self.router.END_ROUTE: - - # Split Grap Execution (i.e multiple subgraphs) - # NOTE: split_route should only appear after the start route node - if next_step == self.router.SPLIT_ROUTE: - if operator_output is None: - raise ValueError( - f"{self.router.SPLIT_ROUTE} should appear after " - f"{self.router.START_ROUTE}" - ) - - operator_output = asyncio.run( - self._apply_split(operator_output, inference_state) - ) - next_step = self.router.route[self.router.JOIN_ROUTE] - if next_step == self.router.END_ROUTE: - return operator_output - - if next_step == self.router.START_ROUTE: - operator_output = run_func( - *args, - func=self._scheduler_group.submit, - operator=self.ops[next_step], - inference_state=inference_state, - pipeline_state=self.pipeline_state, - **kwargs, - ).result() - - if isinstance(operator_output, tuple): - operator_output, state_update = ( - operator_output[0], - operator_output[-1], - ) - inference_state.update_state(state_update) - - next_step = self.router.next(next_step, self.ops, operator_output) - - else: - # Single graph execution - graph = SubGraph( - inf=copy.deepcopy(inference_state), - step=next_step, - end=[self.router.SPLIT_ROUTE, self.router.END_ROUTE], - ) - - operator_output = asyncio.run( - self._run_sub_graphs( - sub_graph_inputs=[operator_output], sub_graphs=[graph] - ) - )[0] - - inference_state = graph.inf - next_step = graph.step - - return operator_output - - def __call__(self, *args, **kwargs): - """ - Consolidate any provided inference_state or pipeline_state objects and pass - any other operator inputs to run(). - - :return: output of the pipeline operators ran with the router for the given - input - """ - if kwargs.get("inference_state"): - inference_state = kwargs.pop("inference_state") - else: - inference_state = InferenceState() - inference_state.create_state({}) - - kwargs["inference_state"] = inference_state - - return self.run(*args, **kwargs) - - def expand_inputs(self, *args, **kwargs): - """ - Generic function to handle expanding values. - """ - raise NotImplementedError( - "This function should be implemented for any router with split or join" - "nodes. expand_inputs will be called prior to the split node (stored in " - "the router's SPLIT_ROUTE attribute), expanding outputs for each output " - "such that there is a batch size of one per thread." - ) - - def condense_inputs(self, *args, **kwargs): - """ - Generic function to handle condensing values. - """ - raise NotImplementedError( - "This function should be implemented for any router with split or join " - "nodes. condense_inputs will be called after the join node (stored in the " - "router's JOIN_ROUTE attribute), condensing outputs from multiple threads." - ) - - def validate(self): - """ - Validate that compatability of the router and operators provided. - """ - router_validation = self.router.validate(self.ops) - - if router_validation is False: - # default error message - op_types = [type(op) for op in self.ops] - raise ValueError(f"Invalid Router: {type(self.router)} for ops: {op_types}") - elif isinstance(router_validation, str): - raise ValueError(f"Invalid Router for operators: {router_validation}") diff --git a/src/deepsparse/v2/task.py b/src/deepsparse/v2/task.py deleted file mode 100644 index f1f4fc6d66..0000000000 --- a/src/deepsparse/v2/task.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Classes and implementations for supported tasks in the DeepSparse pipeline and system -""" - -import importlib -import logging -import os -import sys -from collections import namedtuple -from typing import Iterable, List, Optional, Tuple - - -_LOGGER = logging.getLogger(__name__) - -__all__ = ["SupportedTasks", "AliasedTask"] - - -class AliasedTask: - """ - A task that can have multiple aliases to match to. - For example, question_answering which can alias to qa as well - - :param name: the name of the task such as question_answering or text_classification - :param aliases: the aliases the task can go by in addition to the name such as - qa, glue, sentiment_analysis, etc - """ - - def __init__(self, name: str, aliases: List[str]): - self._name = name - self._aliases = aliases - - @property - def name(self) -> str: - """ - :return: the name of the task such as question_answering - """ - return self._name - - @property - def aliases(self) -> List[str]: - """ - :return: the aliases the task can go by such as qa, glue, sentiment_analysis - """ - return self._aliases - - def matches(self, task: str) -> bool: - """ - :param task: the name of the task to check whether the given instance matches. - Checks the current name as well as any aliases. - Everything is compared at lower case and "-" and whitespace - are replaced with "_". - :return: True if task does match the current instance, False otherwise - """ - task = task.lower().replace("-", "_") - - # replace whitespace with "_" - task = "_".join(task.split()) - - return task == self.name or task in self.aliases - - -class SupportedTasks: - """ - The supported tasks in the DeepSparse pipeline and system - """ - - text_generation = namedtuple( - "text_generation", ["text_generation", "opt", "bloom"] - )( - text_generation=AliasedTask("text_generation", []), - opt=AliasedTask("opt", []), - bloom=AliasedTask("bloom", []), - ) - - all_task_categories = [text_generation] - - @classmethod - def check_register_task( - cls, task: str, extra_tasks: Optional[Iterable[str]] = None - ): - """ - :param task: task name to validate and import dependencies for - :param extra_tasks: valid task names that are not included in supported tasks. - i.e. tasks registered to Pipeline at runtime - """ - if cls.is_text_generation(task): - import deepsparse.v2.text_generation.pipeline # noqa: F401 - - all_tasks = set(cls.task_names() + (list(extra_tasks or []))) - if task not in all_tasks: - raise ValueError( - f"Unknown Pipeline task {task}. Currently supported tasks are " - f"{list(all_tasks)}" - ) - - @classmethod - def is_text_generation(cls, task: str) -> bool: - """ - :param task: the name of the task to check whether it is a text generation task - such as codegen - :return: True if it is a text generation task, False otherwise - """ - return any( - text_generation_task.matches(task) - for text_generation_task in cls.text_generation - ) - - @classmethod - def task_names(cls): - task_names = ["custom"] - for task_category in cls.all_task_categories: - for task in task_category: - unique_aliases = ( - alias for alias in task._aliases if alias != task._name - ) - task_names += (task._name, *unique_aliases) - return task_names - - -def dynamic_import_task(module_or_path: str) -> str: - """ - Dynamically imports `module` with importlib, and returns the `TASK` - attribute on the module (something like `importlib.import_module(module).TASK`). - - Example contents of `module`: - ```python - from deepsparse.pipeline import Pipeline - from deepsparse.transformers.pipelines.question_answering import ( - QuestionAnsweringPipeline, - ) - - TASK = "my_qa_task" - Pipeline.register(TASK)(QuestionAnsweringPipeline) - ``` - - NOTE: this modifies `sys.path`. - - :raises FileNotFoundError: if path does not exist - :raises RuntimeError: if the imported module does not contain `TASK` - :raises RuntimeError: if the module doesn't register the task - :return: The task from the imported module. - """ - parent_dir, module_name = _split_dir_and_name(module_or_path) - if not os.path.exists(os.path.join(parent_dir, module_name + ".py")): - raise FileNotFoundError( - f"Unable to find file for {module_or_path}. " - f"Looked for {module_name}.py under {parent_dir if parent_dir else '.'}" - ) - - # add parent_dir to sys.path so we can import the file as a module - sys.path.append(os.curdir) - if parent_dir: - _LOGGER.info(f"Adding {parent_dir} to sys.path") - sys.path.append(parent_dir) - - # do the import - _LOGGER.info(f"Importing '{module_name}'") - module_or_path = importlib.import_module(module_name) - - if not hasattr(module_or_path, "TASK"): - raise RuntimeError( - "When using --task import:, " - "module must set the `TASK` attribute." - ) - - task = getattr(module_or_path, "TASK") - _LOGGER.info(f"Using task={repr(task)}") - - return task - - -def _split_dir_and_name(module_or_path: str) -> Tuple[str, str]: - """ - Examples: - - `a` -> `("", "a")` - - `a.b` -> `("a", "b")` - - `a.b.c` -> `("a/b", "c")` - - :return: module split into directory & name - """ - if module_or_path.endswith(".py"): - # assume path - split_char = os.sep - module_or_path = module_or_path.replace(".py", "") - else: - # assume module - split_char = "." - *dirs, module_name = module_or_path.split(split_char) - parent_dir = os.sep if dirs == [""] else os.sep.join(dirs) - return parent_dir, module_name diff --git a/src/deepsparse/yolact/annotate.py b/src/deepsparse/yolact/annotate.py index 18e7d8c952..7fdc837aa7 100644 --- a/src/deepsparse/yolact/annotate.py +++ b/src/deepsparse/yolact/annotate.py @@ -69,7 +69,7 @@ import click import cv2 -from deepsparse.pipeline import Pipeline +from deepsparse.legacy.pipeline import Pipeline from deepsparse.utils.annotate import ( annotate, get_annotations_save_dir, diff --git a/src/deepsparse/yolact/pipelines.py b/src/deepsparse/yolact/pipelines.py index a0e0968dce..99ab3cc876 100644 --- a/src/deepsparse/yolact/pipelines.py +++ b/src/deepsparse/yolact/pipelines.py @@ -18,7 +18,7 @@ import numpy import torch -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.utils import model_to_path from deepsparse.yolact.schemas import YOLACTInputSchema, YOLACTOutputSchema from deepsparse.yolact.utils import ( diff --git a/src/deepsparse/yolo/pipelines.py b/src/deepsparse/yolo/pipelines.py index 935fc9a1d4..513c62c1fb 100644 --- a/src/deepsparse/yolo/pipelines.py +++ b/src/deepsparse/yolo/pipelines.py @@ -18,7 +18,7 @@ import numpy import onnx -from deepsparse.pipeline import Pipeline +from deepsparse.legacy.pipeline import Pipeline from deepsparse.utils import model_to_path from deepsparse.yolo.schemas import YOLOInput, YOLOOutput from deepsparse.yolo.utils import ( diff --git a/src/deepsparse/yolov8/pipelines.py b/src/deepsparse/yolov8/pipelines.py index 4264b5f902..f64fac2cdd 100644 --- a/src/deepsparse/yolov8/pipelines.py +++ b/src/deepsparse/yolov8/pipelines.py @@ -19,7 +19,7 @@ import numpy import torch -from deepsparse import Pipeline +from deepsparse.legacy import Pipeline from deepsparse.yolo import YOLOOutput as YOLODetOutput from deepsparse.yolo import YOLOPipeline from deepsparse.yolov8.schemas import YOLOSegOutput diff --git a/tests/deepsparse/evaluation/test_utils.py b/tests/deepsparse/evaluation/test_utils.py index 047799367d..648a521a3d 100644 --- a/tests/deepsparse/evaluation/test_utils.py +++ b/tests/deepsparse/evaluation/test_utils.py @@ -47,12 +47,12 @@ def torch_target(): def test_initialize_model_from_target_pipeline_onnx(pipeline_target): model = text_generation_model_from_target(pipeline_target, "onnxruntime") - assert model.engine_type == "onnxruntime" + assert model.ops.get("single_engine")._engine_type == "onnxruntime" def test_initialize_model_from_target_pipeline_deepsparse(pipeline_target): model = text_generation_model_from_target(pipeline_target, "deepsparse") - assert model.engine_type == "deepsparse" + assert model.ops.get("single_engine")._engine_type == "deepsparse" def test_initialize_model_from_target_torch(torch_target): diff --git a/tests/deepsparse/image_classification/test_pipelines.py b/tests/deepsparse/image_classification/legacy/test_pipelines.py similarity index 98% rename from tests/deepsparse/image_classification/test_pipelines.py rename to tests/deepsparse/image_classification/legacy/test_pipelines.py index 1d74831679..5edf500fd4 100644 --- a/tests/deepsparse/image_classification/test_pipelines.py +++ b/tests/deepsparse/image_classification/legacy/test_pipelines.py @@ -16,11 +16,11 @@ import numpy import pytest -from deepsparse import Pipeline from deepsparse.image_classification.constants import ( IMAGENET_RGB_MEANS, IMAGENET_RGB_STDS, ) +from deepsparse.legacy import Pipeline from sparsezoo import Model from sparsezoo.utils import load_numpy_list from tests.utils import mock_engine diff --git a/tests/deepsparse/v2/test_image_classification.py b/tests/deepsparse/image_classification/test_image_classification.py similarity index 88% rename from tests/deepsparse/v2/test_image_classification.py rename to tests/deepsparse/image_classification/test_image_classification.py index c6b04e6f2f..5c0e0761df 100644 --- a/tests/deepsparse/v2/test_image_classification.py +++ b/tests/deepsparse/image_classification/test_image_classification.py @@ -15,10 +15,8 @@ import numpy import pytest -from deepsparse.v2.image_classification import ImageClassificationPipeline -from deepsparse.v2.image_classification.preprocess_operator import ( - ImageClassificationInput, -) +from deepsparse.image_classification import ImageClassificationPipeline +from deepsparse.image_classification.preprocess_operator import ImageClassificationInput from tests.deepsparse.pipelines.data_helpers import computer_vision diff --git a/tests/deepsparse/pipelines/dynamic_import_modules/valid_dynamic_import.py b/tests/deepsparse/pipelines/dynamic_import_modules/valid_dynamic_import.py index ec05003822..8f3017f248 100644 --- a/tests/deepsparse/pipelines/dynamic_import_modules/valid_dynamic_import.py +++ b/tests/deepsparse/pipelines/dynamic_import_modules/valid_dynamic_import.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from deepsparse.pipeline import Pipeline +# TODO: update to test the new Pipeline +from deepsparse.legacy.pipeline import Pipeline TASK = "unit_test_task" diff --git a/tests/deepsparse/v2/test_basic_pipeline.py b/tests/deepsparse/pipelines/test_basic_pipeline.py similarity index 88% rename from tests/deepsparse/v2/test_basic_pipeline.py rename to tests/deepsparse/pipelines/test_basic_pipeline.py index bedddd537a..c98ffd538c 100644 --- a/tests/deepsparse/v2/test_basic_pipeline.py +++ b/tests/deepsparse/pipelines/test_basic_pipeline.py @@ -20,10 +20,10 @@ from pydantic import BaseModel -from deepsparse.v2 import Pipeline -from deepsparse.v2.operators import Operator -from deepsparse.v2.routers import LinearRouter -from deepsparse.v2.schedulers import OperatorScheduler +from deepsparse import Pipeline +from deepsparse.operators import Operator +from deepsparse.routers import LinearRouter +from deepsparse.schedulers import OperatorScheduler class IntSchema(BaseModel): diff --git a/tests/deepsparse/pipelines/test_bucketing.py b/tests/deepsparse/pipelines/test_bucketing.py index f3ef345245..f5963658d2 100644 --- a/tests/deepsparse/pipelines/test_bucketing.py +++ b/tests/deepsparse/pipelines/test_bucketing.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest -from deepsparse import BucketingPipeline, Pipeline +from deepsparse.legacy import BucketingPipeline, Pipeline from tests.utils import mock_engine diff --git a/tests/deepsparse/pipelines/test_clip.py b/tests/deepsparse/pipelines/test_clip.py index b085686186..cb8bfeb97b 100644 --- a/tests/deepsparse/pipelines/test_clip.py +++ b/tests/deepsparse/pipelines/test_clip.py @@ -13,7 +13,6 @@ # limitations under the License. import pytest -from deepsparse import BasePipeline, Pipeline from deepsparse.clip import ( CLIPCaptionInput, CLIPCaptionPipeline, @@ -48,6 +47,8 @@ def text_input(): @pytest.mark.skip(reason="No CLIP models currently available to run tests") @mock_engine(rng_seed=0) def test_visual_clip(engine, visual_input): + from deepsparse import Pipeline + model_path = visual_input[-1] pipeline = Pipeline.create(task="clip_visual", model_path=model_path) assert isinstance(pipeline, CLIPVisualPipeline) @@ -59,6 +60,8 @@ def test_visual_clip(engine, visual_input): @pytest.mark.skip(reason="No CLIP models curently available to run tests") @mock_engine(rng_seed=0) def test_text_clip(engine, text_input): + from deepsparse import Pipeline + model_path = text_input[-1] pipeline = Pipeline.create(task="clip_text", model_path=model_path) assert isinstance(pipeline, CLIPTextPipeline) @@ -70,6 +73,8 @@ def test_text_clip(engine, text_input): @pytest.mark.skip(reason="No CLIP models currently available to run tests") @mock_engine(rng_seed=0) def test_zero_shot(engine, visual_input, text_input): + from deepsparse.legacy import BasePipeline + model_path_text = text_input[-1] model_path_visual = visual_input[-1] kwargs = { @@ -88,6 +93,8 @@ def test_zero_shot(engine, visual_input, text_input): @pytest.mark.skip(reason="No CLIP models currently available to run tests") @mock_engine(rng_seed=0) def test_caption(engine, visual_input, text_input): + from deepsparse.legacy import BasePipeline + model_path_visual = text_input[-1] model_path_text = text_input[-1] model_path_decoder = None diff --git a/tests/deepsparse/pipelines/test_custom_pipeline.py b/tests/deepsparse/pipelines/test_custom_pipeline.py index 061b59ae03..34876f99c0 100644 --- a/tests/deepsparse/pipelines/test_custom_pipeline.py +++ b/tests/deepsparse/pipelines/test_custom_pipeline.py @@ -52,6 +52,9 @@ def model_path(): ], ) def test_custom_pipeline_task_names(task_name): + # TODO: update test to be compatible with new pipeline + from deepsparse.legacy.pipeline import Pipeline + cls = Pipeline._get_task_constructor(task_name) assert cls == CustomTaskPipeline diff --git a/tests/deepsparse/pipelines/test_dynamic_import.py b/tests/deepsparse/pipelines/test_dynamic_import.py index 63096e2365..4d5a9333ac 100644 --- a/tests/deepsparse/pipelines/test_dynamic_import.py +++ b/tests/deepsparse/pipelines/test_dynamic_import.py @@ -15,8 +15,10 @@ import os import pytest -from deepsparse.pipeline import _REGISTERED_PIPELINES, Pipeline -from deepsparse.tasks import _split_dir_and_name, dynamic_import_task + +# TODO: update to test the new Pipeline +from deepsparse.legacy.pipeline import _REGISTERED_PIPELINES, Pipeline +from deepsparse.legacy.tasks import _split_dir_and_name, dynamic_import_task def test_split_dir_and_name_module(): diff --git a/tests/deepsparse/pipelines/test_pipeline.py b/tests/deepsparse/pipelines/test_pipeline.py index 945959c679..6ad1c71fe4 100644 --- a/tests/deepsparse/pipelines/test_pipeline.py +++ b/tests/deepsparse/pipelines/test_pipeline.py @@ -18,8 +18,10 @@ import flaky import pytest -from deepsparse.base_pipeline import BasePipeline -from deepsparse.pipeline import ( +from deepsparse.legacy.base_pipeline import BasePipeline + +# TODO: update to test the new pipeline +from deepsparse.legacy.pipeline import ( Pipeline, PipelineConfig, _initialize_executor_and_workers, diff --git a/tests/deepsparse/v2/__init__.py b/tests/deepsparse/schedulers/__init__.py similarity index 100% rename from tests/deepsparse/v2/__init__.py rename to tests/deepsparse/schedulers/__init__.py diff --git a/tests/deepsparse/v2/schedulers/test_continuous_batching_scheduler.py b/tests/deepsparse/schedulers/test_continuous_batching_scheduler.py similarity index 94% rename from tests/deepsparse/v2/schedulers/test_continuous_batching_scheduler.py rename to tests/deepsparse/schedulers/test_continuous_batching_scheduler.py index 85cac323e0..6d56d71eff 100644 --- a/tests/deepsparse/v2/schedulers/test_continuous_batching_scheduler.py +++ b/tests/deepsparse/schedulers/test_continuous_batching_scheduler.py @@ -16,8 +16,8 @@ import numpy -from deepsparse.v2.operators import EngineOperator -from deepsparse.v2.schedulers import ContinuousBatchingScheduler +from deepsparse.operators import EngineOperator +from deepsparse.schedulers import ContinuousBatchingScheduler def test_continuous_batching_executor_thread(): diff --git a/tests/deepsparse/v2/integration_tests/__init__.py b/tests/deepsparse/schedulers/utils/__init__.py similarity index 100% rename from tests/deepsparse/v2/integration_tests/__init__.py rename to tests/deepsparse/schedulers/utils/__init__.py diff --git a/tests/deepsparse/v2/schedulers/utils/test_continuous_batching_executor.py b/tests/deepsparse/schedulers/utils/test_continuous_batching_executor.py similarity index 96% rename from tests/deepsparse/v2/schedulers/utils/test_continuous_batching_executor.py rename to tests/deepsparse/schedulers/utils/test_continuous_batching_executor.py index 2b7c5a5e68..6389a321d4 100644 --- a/tests/deepsparse/v2/schedulers/utils/test_continuous_batching_executor.py +++ b/tests/deepsparse/schedulers/utils/test_continuous_batching_executor.py @@ -17,8 +17,8 @@ import numpy -from deepsparse.v2.operators import EngineOperator -from deepsparse.v2.schedulers.utils import ( +from deepsparse.operators import EngineOperator +from deepsparse.schedulers.utils import ( ContinuousBatchingExecutorThread, ContinuousBatchingQueues, ) diff --git a/tests/deepsparse/v2/schedulers/utils/test_continuous_batching_queues.py b/tests/deepsparse/schedulers/utils/test_continuous_batching_queues.py similarity index 99% rename from tests/deepsparse/v2/schedulers/utils/test_continuous_batching_queues.py rename to tests/deepsparse/schedulers/utils/test_continuous_batching_queues.py index 1713d54f82..2ef78ccbd2 100644 --- a/tests/deepsparse/v2/schedulers/utils/test_continuous_batching_queues.py +++ b/tests/deepsparse/schedulers/utils/test_continuous_batching_queues.py @@ -16,7 +16,7 @@ from threading import Thread import pytest -from deepsparse.v2.schedulers.utils import ( +from deepsparse.schedulers.utils import ( ContinuousBatchingQueue, ContinuousBatchingQueues, QueueEntry, diff --git a/tests/deepsparse/v2/schedulers/__init__.py b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/__init__.py similarity index 100% rename from tests/deepsparse/v2/schedulers/__init__.py rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/__init__.py diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/configs/codegen.yaml b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/codegen.yaml similarity index 100% rename from tests/deepsparse/transformers/pipelines/integration_tests/configs/codegen.yaml rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/codegen.yaml diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/configs/gpt_neo.yaml b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/gpt_neo.yaml similarity index 100% rename from tests/deepsparse/transformers/pipelines/integration_tests/configs/gpt_neo.yaml rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/gpt_neo.yaml diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/configs/opt.yaml b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/opt.yaml similarity index 100% rename from tests/deepsparse/transformers/pipelines/integration_tests/configs/opt.yaml rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/opt.yaml diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/helpers.py b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/helpers.py similarity index 100% rename from tests/deepsparse/transformers/pipelines/integration_tests/helpers.py rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/helpers.py diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/test_llms.py b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/test_llms.py similarity index 96% rename from tests/deepsparse/transformers/pipelines/integration_tests/test_llms.py rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/test_llms.py index 33dca47bfa..eb02b91ba9 100644 --- a/tests/deepsparse/transformers/pipelines/integration_tests/test_llms.py +++ b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/test_llms.py @@ -42,10 +42,13 @@ import numpy import pytest -from deepsparse import Pipeline -from deepsparse.transformers.pipelines.text_generation import TextGenerationOutput + +# NOTE: this tests the legacy text generation pipeline. integration tests exist +# for the new pipeline under v2 +from deepsparse.legacy import Pipeline +from deepsparse.transformers.schemas.text_generation_schemas import TextGenerationOutput from sparsezoo import Model -from tests.deepsparse.transformers.pipelines.integration_tests.helpers import ( +from tests.deepsparse.transformers.pipelines.legacy.integration_tests.helpers import ( TorchGroundTruthSource, parse_params, validate_internal_kv_cache, @@ -53,7 +56,9 @@ ) -CONFIGS_DIRECTORY = "tests/deepsparse/transformers/pipelines/integration_tests/configs" +CONFIGS_DIRECTORY = ( + "tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs" +) @pytest.fixture() diff --git a/tests/deepsparse/transformers/pipelines/test_text_generation.py b/tests/deepsparse/transformers/pipelines/test_text_generation.py index ba2a52c40e..4b783ad53d 100644 --- a/tests/deepsparse/transformers/pipelines/test_text_generation.py +++ b/tests/deepsparse/transformers/pipelines/test_text_generation.py @@ -17,7 +17,9 @@ import numpy import pytest -from deepsparse import Pipeline + +# TODO: update to use/be compliant with new pipeline +from deepsparse.legacy.pipeline import Pipeline from deepsparse.transformers.utils.helpers import prepends_bos_token diff --git a/tests/deepsparse/v2/schedulers/utils/__init__.py b/tests/deepsparse/transformers/text_generation/__init__.py similarity index 100% rename from tests/deepsparse/v2/schedulers/utils/__init__.py rename to tests/deepsparse/transformers/text_generation/__init__.py diff --git a/tests/deepsparse/transformers/text_generation/integration_tests/__init__.py b/tests/deepsparse/transformers/text_generation/integration_tests/__init__.py new file mode 100644 index 0000000000..0c44f887a4 --- /dev/null +++ b/tests/deepsparse/transformers/text_generation/integration_tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/deepsparse/v2/integration_tests/configs/codegen.yaml b/tests/deepsparse/transformers/text_generation/integration_tests/configs/codegen.yaml similarity index 100% rename from tests/deepsparse/v2/integration_tests/configs/codegen.yaml rename to tests/deepsparse/transformers/text_generation/integration_tests/configs/codegen.yaml diff --git a/tests/deepsparse/v2/integration_tests/configs/gpt_neo.yaml b/tests/deepsparse/transformers/text_generation/integration_tests/configs/gpt_neo.yaml similarity index 100% rename from tests/deepsparse/v2/integration_tests/configs/gpt_neo.yaml rename to tests/deepsparse/transformers/text_generation/integration_tests/configs/gpt_neo.yaml diff --git a/tests/deepsparse/v2/integration_tests/configs/opt.yaml b/tests/deepsparse/transformers/text_generation/integration_tests/configs/opt.yaml similarity index 100% rename from tests/deepsparse/v2/integration_tests/configs/opt.yaml rename to tests/deepsparse/transformers/text_generation/integration_tests/configs/opt.yaml diff --git a/tests/deepsparse/v2/integration_tests/helpers.py b/tests/deepsparse/transformers/text_generation/integration_tests/helpers.py similarity index 100% rename from tests/deepsparse/v2/integration_tests/helpers.py rename to tests/deepsparse/transformers/text_generation/integration_tests/helpers.py diff --git a/tests/deepsparse/v2/integration_tests/test_llms.py b/tests/deepsparse/transformers/text_generation/integration_tests/test_llms.py similarity index 96% rename from tests/deepsparse/v2/integration_tests/test_llms.py rename to tests/deepsparse/transformers/text_generation/integration_tests/test_llms.py index c53899f30c..45ba1135b7 100644 --- a/tests/deepsparse/v2/integration_tests/test_llms.py +++ b/tests/deepsparse/transformers/text_generation/integration_tests/test_llms.py @@ -39,18 +39,20 @@ import numpy import pytest -from deepsparse.transformers.pipelines.text_generation import TextGenerationOutput -from deepsparse.v2.pipeline import Pipeline -from deepsparse.v2.text_generation import TextGenerationPipeline +from deepsparse import Pipeline +from deepsparse.transformers.pipelines.text_generation import TextGenerationPipeline +from deepsparse.transformers.schemas.text_generation_schemas import TextGenerationOutput from sparsezoo import Model -from tests.deepsparse.transformers.pipelines.integration_tests.helpers import ( +from tests.deepsparse.transformers.pipelines.legacy.integration_tests.helpers import ( TorchGroundTruthSource, parse_params, validate_internal_kv_cache, ) -CONFIGS_DIRECTORY = "tests/deepsparse/v2/integration_tests/configs" +CONFIGS_DIRECTORY = ( + "tests/deepsparse/transformers/text_generation/integration_tests/configs" +) @pytest.fixture() @@ -135,7 +137,7 @@ def test_ort_single_token_prefill(self, setup): pipeline = self.get_pipeline( prompt_sequence_length=1, - engine_kwargs={"engine_type": "onnxruntime"}, + engine_type="onnxruntime", ) output = pipeline( prompt=self.prompt, @@ -163,7 +165,7 @@ def test_ort_multi_token_prefill(self, setup): "Cannot run ORT pipeline with the internal deepsparse cache enabled." ) pipeline = self.get_pipeline( - engine_kwargs={"engine_type": "onnxruntime"}, + engine_type="onnxruntime", ) output = pipeline( prompt=self.prompt, @@ -244,7 +246,7 @@ def test_inference_no_kv_cache_ort(self, setup): def _test_inference_no_kv_cache(self, engine_type): model_path_no_cache = self._get_model_path_no_cache() pipeline = self.get_pipeline( - model_path=model_path_no_cache, engine_kwargs={"engine_type": engine_type} + model_path=model_path_no_cache, engine_type=engine_type ) assert not pipeline.cache_support_enabled, ( "This pipeline test inference using non-kv cache " diff --git a/tests/deepsparse/v2/unit/text_generation/conftest.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/conftest.py similarity index 96% rename from tests/deepsparse/v2/unit/text_generation/conftest.py rename to tests/deepsparse/transformers/text_generation/unit/text_generation/conftest.py index 3840a9bb0a..7f0251a4d7 100644 --- a/tests/deepsparse/v2/unit/text_generation/conftest.py +++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/conftest.py @@ -20,13 +20,16 @@ import pytest from deepsparse.transformers.helpers import get_deployment_path from deepsparse.transformers.pipelines.text_generation import ( + NLEngineOperator, + TokenGeneratorOperator, +) +from deepsparse.transformers.schemas.text_generation_schemas import ( GenerationDefaults, TextGenerationInput, ) from deepsparse.transformers.utils import DecoderKVCache from deepsparse.transformers.utils.helpers import initialize_kv_cache_state -from deepsparse.v2 import InferenceState, PipelineState -from deepsparse.v2.text_generation import NLEngineOperator, TokenGeneratorOperator +from deepsparse.utils import InferenceState, PipelineState @pytest.fixture(scope="module") diff --git a/tests/deepsparse/v2/unit/text_generation/test_kv_cache.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_kv_cache.py similarity index 93% rename from tests/deepsparse/v2/unit/text_generation/test_kv_cache.py rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_kv_cache.py index 0c6e42503a..c855dc2521 100644 --- a/tests/deepsparse/v2/unit/text_generation/test_kv_cache.py +++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_kv_cache.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from deepsparse.v2.text_generation import KVCacheCreator, KVCacheCreatorInput +from deepsparse.transformers.pipelines.text_generation import ( + KVCacheCreator, + KVCacheCreatorInput, +) def test_kv_cache_creation( diff --git a/tests/deepsparse/v2/unit/text_generation/test_misc.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_misc.py similarity index 89% rename from tests/deepsparse/v2/unit/text_generation/test_misc.py rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_misc.py index f215e2aedb..4db36de7ad 100644 --- a/tests/deepsparse/v2/unit/text_generation/test_misc.py +++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_misc.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from deepsparse.v2.text_generation import CompilePromptLogits -from deepsparse.v2.text_generation.nl_engine_operator import NLEngineOutputs +from deepsparse.transformers.pipelines.text_generation import CompilePromptLogits +from deepsparse.transformers.pipelines.text_generation.nl_engine_operator import ( + NLEngineOutputs, +) def test_compile_logits(mock_logits, mock_inference_state, mock_tokens, mock_kv_cache): diff --git a/tests/deepsparse/v2/unit/text_generation/test_process_inputs.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_process_inputs.py similarity index 90% rename from tests/deepsparse/v2/unit/text_generation/test_process_inputs.py rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_process_inputs.py index 02f4540c44..4362d7f7d8 100644 --- a/tests/deepsparse/v2/unit/text_generation/test_process_inputs.py +++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_process_inputs.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from deepsparse.transformers.pipelines.text_generation import GenerationDefaults -from deepsparse.v2.text_generation import ProcessInputsTextGeneration +from deepsparse.legacy.transformers.pipelines.text_generation import GenerationDefaults +from deepsparse.transformers.pipelines.text_generation import ( + ProcessInputsTextGeneration, +) def test_process_inputs( diff --git a/tests/deepsparse/v2/unit/text_generation/test_single_token_engine.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_single_token_engine.py similarity index 98% rename from tests/deepsparse/v2/unit/text_generation/test_single_token_engine.py rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_single_token_engine.py index 19bb4d1c4a..b902417efc 100644 --- a/tests/deepsparse/v2/unit/text_generation/test_single_token_engine.py +++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_single_token_engine.py @@ -14,7 +14,7 @@ import numpy -from deepsparse.v2.text_generation import ( +from deepsparse.transformers.pipelines.text_generation import ( AutoRegressiveOperatorPreprocess, NLEngineInputs, ) diff --git a/tests/deepsparse/v2/unit/text_generation/test_token_generation.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_token_generation.py similarity index 95% rename from tests/deepsparse/v2/unit/text_generation/test_token_generation.py rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_token_generation.py index d04f863171..613f1106b3 100644 --- a/tests/deepsparse/v2/unit/text_generation/test_token_generation.py +++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_token_generation.py @@ -13,12 +13,14 @@ # limitations under the License. import numpy -from deepsparse.v2.text_generation import ( +from deepsparse.transformers.pipelines.text_generation import ( GenerateNewTokenOperator, PrepareGeneration, TokenGeneratorOperator, ) -from deepsparse.v2.text_generation.nl_engine_operator import NLEngineOutputs +from deepsparse.transformers.pipelines.text_generation.nl_engine_operator import ( + NLEngineOutputs, +) def test_prep_for_generation( diff --git a/tests/deepsparse/v2/unit/text_generation/text_multi_token_engine.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/text_multi_token_engine.py similarity index 96% rename from tests/deepsparse/v2/unit/text_generation/text_multi_token_engine.py rename to tests/deepsparse/transformers/text_generation/unit/text_generation/text_multi_token_engine.py index d2c822af4c..42dd1b1c97 100644 --- a/tests/deepsparse/v2/unit/text_generation/text_multi_token_engine.py +++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/text_multi_token_engine.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from deepsparse.v2.text_generation import MultiEnginePrefill +from deepsparse.transformers.pipelines.text_generation import MultiEnginePrefill def test_mult_engine_preprocess(