diff --git a/src/deepsparse/__init__.py b/src/deepsparse/__init__.py
index 6c7d0f1cac..83fc4d9632 100644
--- a/src/deepsparse/__init__.py
+++ b/src/deepsparse/__init__.py
@@ -33,7 +33,6 @@
 from .engine import *
 from .tasks import *
 from .pipeline import *
-from .base_pipeline import *
 from .loggers import *
 from .version import __version__, is_release
 from .analytics import deepsparse_analytics as _analytics
diff --git a/src/deepsparse/clip/captioning_pipeline.py b/src/deepsparse/clip/captioning_pipeline.py
index 4e99484b6f..cc8a082c2a 100644
--- a/src/deepsparse/clip/captioning_pipeline.py
+++ b/src/deepsparse/clip/captioning_pipeline.py
@@ -27,7 +27,7 @@
 import torch
 import torch.nn.functional as F
 from deepsparse.clip import CLIPDecoderInput, CLIPTextInput, CLIPVisualInput
-from deepsparse.pipeline import BasePipeline, Pipeline
+from deepsparse.legacy.pipeline import BasePipeline, Pipeline
 
 
 __all__ = ["CLIPCaptionInput", "CLIPCaptionOutput", "CLIPCaptionPipeline"]
diff --git a/src/deepsparse/clip/decoder_pipeline.py b/src/deepsparse/clip/decoder_pipeline.py
index 6bc1347012..28388b3a74 100644
--- a/src/deepsparse/clip/decoder_pipeline.py
+++ b/src/deepsparse/clip/decoder_pipeline.py
@@ -17,7 +17,7 @@
 import numpy as np
 from pydantic import BaseModel, Field
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.utils import model_to_path
 
 
diff --git a/src/deepsparse/clip/text_pipeline.py b/src/deepsparse/clip/text_pipeline.py
index 1d1dbc22ad..89ed8c0f11 100644
--- a/src/deepsparse/clip/text_pipeline.py
+++ b/src/deepsparse/clip/text_pipeline.py
@@ -17,7 +17,7 @@
 import numpy as np
 from pydantic import BaseModel, Field
 
-from deepsparse.pipeline import Pipeline
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.utils import model_to_path
 from open_clip.tokenizer import tokenize
 
diff --git a/src/deepsparse/clip/visual_pipeline.py b/src/deepsparse/clip/visual_pipeline.py
index ac71c6c6e1..b827e2db64 100644
--- a/src/deepsparse/clip/visual_pipeline.py
+++ b/src/deepsparse/clip/visual_pipeline.py
@@ -22,7 +22,7 @@
 from torchvision.transforms import InterpolationMode
 
 from deepsparse.clip.constants import CLIP_RGB_MEANS, CLIP_RGB_STDS
-from deepsparse.pipeline import Pipeline
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.pipelines.computer_vision import ComputerVisionSchema
 from deepsparse.utils import model_to_path
 
diff --git a/src/deepsparse/clip/zeroshot_pipeline.py b/src/deepsparse/clip/zeroshot_pipeline.py
index 56c0df062e..fec1c7de54 100644
--- a/src/deepsparse/clip/zeroshot_pipeline.py
+++ b/src/deepsparse/clip/zeroshot_pipeline.py
@@ -19,7 +19,7 @@
 from pydantic import BaseModel, Field
 
 from deepsparse.clip import CLIPTextInput, CLIPVisualInput
-from deepsparse.pipeline import BasePipeline, Pipeline
+from deepsparse.legacy.pipeline import BasePipeline, Pipeline
 from scipy.special import softmax
 
 
diff --git a/src/deepsparse/evaluation/cli.py b/src/deepsparse/evaluation/cli.py
index 674859cace..bd557a3cf7 100644
--- a/src/deepsparse/evaluation/cli.py
+++ b/src/deepsparse/evaluation/cli.py
@@ -77,7 +77,11 @@
 )
 from src.deepsparse.evaluation.results import Result, save_result
 from src.deepsparse.evaluation.utils import args_to_dict, get_save_path
-from src.deepsparse.pipeline import DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE
+from src.deepsparse.operators.engine_operator import (
+    DEEPSPARSE_ENGINE,
+    ORT_ENGINE,
+    TORCHSCRIPT_ENGINE,
+)
 
 
 _LOGGER = logging.getLogger(__name__)
diff --git a/src/deepsparse/evaluation/evaluator.py b/src/deepsparse/evaluation/evaluator.py
index d58fef7714..43948c6c51 100644
--- a/src/deepsparse/evaluation/evaluator.py
+++ b/src/deepsparse/evaluation/evaluator.py
@@ -19,7 +19,11 @@
 )
 from src.deepsparse.evaluation.registry import EvaluationRegistry
 from src.deepsparse.evaluation.results import Result
-from src.deepsparse.pipeline import DEEPSPARSE_ENGINE, ORT_ENGINE, TORCHSCRIPT_ENGINE
+from src.deepsparse.operators.engine_operator import (
+    DEEPSPARSE_ENGINE,
+    ORT_ENGINE,
+    TORCHSCRIPT_ENGINE,
+)
 
 
 __all__ = ["evaluate"]
diff --git a/src/deepsparse/evaluation/utils.py b/src/deepsparse/evaluation/utils.py
index 4685932084..1091b8d4e3 100644
--- a/src/deepsparse/evaluation/utils.py
+++ b/src/deepsparse/evaluation/utils.py
@@ -17,7 +17,8 @@
 
 from transformers import AutoModelForCausalLM
 
-from deepsparse import DEEPSPARSE_ENGINE, ORT_ENGINE, Pipeline
+from deepsparse import Pipeline
+from deepsparse.operators.engine_operator import DEEPSPARSE_ENGINE, ORT_ENGINE
 
 
 __all__ = ["text_generation_model_from_target", "get_save_path", "args_to_dict"]
diff --git a/src/deepsparse/image_classification/__init__.py b/src/deepsparse/image_classification/__init__.py
index ddb21bd1f7..4009e35e41 100644
--- a/src/deepsparse/image_classification/__init__.py
+++ b/src/deepsparse/image_classification/__init__.py
@@ -25,9 +25,12 @@
         "Please install deepsparse[image_classification] to use this pathway"
     )
 
-
 from .constants import *
-from .pipelines import *
+from .pipeline import *
+
+# flake8: noqa
+from .postprocess_operator import *
+from .preprocess_operator import *
 from .schemas import *
 
 
diff --git a/src/deepsparse/v2/image_classification/pipeline.py b/src/deepsparse/image_classification/pipeline.py
similarity index 73%
rename from src/deepsparse/v2/image_classification/pipeline.py
rename to src/deepsparse/image_classification/pipeline.py
index 3d7887a701..738ff980bf 100644
--- a/src/deepsparse/v2/image_classification/pipeline.py
+++ b/src/deepsparse/image_classification/pipeline.py
@@ -13,19 +13,19 @@
 # limitations under the License.
 
 import logging
-import warnings
 from typing import Dict, Optional, Tuple, Union
 
-from deepsparse.v2.image_classification.postprocess_operator import (
+from deepsparse.image_classification.postprocess_operator import (
     ImageClassificationPostProcess,
 )
-from deepsparse.v2.image_classification.preprocess_operator import (
+from deepsparse.image_classification.preprocess_operator import (
     ImageClassificationPreProcess,
 )
-from deepsparse.v2.operators.engine_operator import EngineOperator
-from deepsparse.v2.pipeline import Pipeline
-from deepsparse.v2.routers.router import LinearRouter
-from deepsparse.v2.schedulers.scheduler import OperatorScheduler
+from deepsparse.operators.engine_operator import EngineOperator
+from deepsparse.operators.registry import OperatorRegistry
+from deepsparse.pipeline import Pipeline
+from deepsparse.routers.router import LinearRouter
+from deepsparse.schedulers.scheduler import OperatorScheduler
 
 
 _LOGGER = logging.getLogger(__name__)
@@ -33,20 +33,23 @@
 __all__ = ["ImageClassificationPipeline"]
 
 
+@OperatorRegistry.register(name="image_classification")
 class ImageClassificationPipeline(Pipeline):
     def __init__(
         self,
         model_path: str,
-        engine_kwargs: Optional[Dict] = None,
         class_names: Union[None, str, Dict[str, str]] = None,
         image_size: Optional[Tuple[int]] = None,
         top_k: int = 1,
+        **engine_kwargs,
     ):
+
         if not engine_kwargs:
             engine_kwargs = {}
             engine_kwargs["model_path"] = model_path
         elif engine_kwargs.get("model_path") != model_path:
-            warnings.warn(f"Updating engine_kwargs to include {model_path}")
+            _LOGGER.warning(f"Updating engine_kwargs to include {model_path}")
+            engine_kwargs["model_path"] = model_path
 
         engine = EngineOperator(**engine_kwargs)
         preproces = ImageClassificationPreProcess(
diff --git a/src/deepsparse/v2/image_classification/postprocess_operator.py b/src/deepsparse/image_classification/postprocess_operator.py
similarity index 98%
rename from src/deepsparse/v2/image_classification/postprocess_operator.py
rename to src/deepsparse/image_classification/postprocess_operator.py
index 9231113368..214c115e70 100644
--- a/src/deepsparse/v2/image_classification/postprocess_operator.py
+++ b/src/deepsparse/image_classification/postprocess_operator.py
@@ -18,7 +18,7 @@
 import numpy
 from pydantic import BaseModel, Field
 
-from deepsparse.v2.operators import Operator
+from deepsparse.operators import Operator
 
 
 class ImageClassificationOutput(BaseModel):
diff --git a/src/deepsparse/v2/image_classification/preprocess_operator.py b/src/deepsparse/image_classification/preprocess_operator.py
similarity index 99%
rename from src/deepsparse/v2/image_classification/preprocess_operator.py
rename to src/deepsparse/image_classification/preprocess_operator.py
index 9b4517a44c..2f26c3afaa 100644
--- a/src/deepsparse/v2/image_classification/preprocess_operator.py
+++ b/src/deepsparse/image_classification/preprocess_operator.py
@@ -23,8 +23,8 @@
     IMAGENET_RGB_MEANS,
     IMAGENET_RGB_STDS,
 )
+from deepsparse.operators import Operator
 from deepsparse.pipelines.computer_vision import ComputerVisionSchema
-from deepsparse.v2.operators import Operator
 
 
 class ImageClassificationInput(ComputerVisionSchema):
diff --git a/src/deepsparse/image_classification/validation_script.py b/src/deepsparse/image_classification/validation_script.py
index 4a0a884084..9cd4e14c30 100644
--- a/src/deepsparse/image_classification/validation_script.py
+++ b/src/deepsparse/image_classification/validation_script.py
@@ -27,13 +27,13 @@
                                   on Imagenette  [default: zoo:cv/classificati
                                   on/resnet_v1-50/pytorch/sparseml/imagenette/
                                   base-none]
+  --image-size, --image_size INTEGER
+                                  integer size to evaluate images at (will be
+                                  reshaped to square shape)  [default: 224]
   --batch-size, --batch_size INTEGER
                                   Test batch size, must divide the dataset
                                   evenly, else last batch will be dropped
                                   [default: 1]
-  --image-size, --image_size INTEGER
-                                  integer size to evaluate images at (will be
-                                  reshaped to square shape)  [default: 224]
   --num-cores, --num_cores INTEGER
                                   Number of CPU cores to run deepsparse with,
                                   default is all available
@@ -213,11 +213,10 @@ def main(
     pipeline = Pipeline.create(
         task="image_classification",
         model_path=model_path,
+        engine_type=engine,
         batch_size=batch_size,
         num_cores=num_cores,
-        engine_type=engine,
     )
-    print(f"engine info: {pipeline.engine}")
     correct = total = 0
     progress_bar = tqdm(data_loader)
 
diff --git a/src/deepsparse/v2/utils/__init__.py b/src/deepsparse/legacy/__init__.py
similarity index 86%
rename from src/deepsparse/v2/utils/__init__.py
rename to src/deepsparse/legacy/__init__.py
index 75935a9729..0e53b4e85d 100644
--- a/src/deepsparse/v2/utils/__init__.py
+++ b/src/deepsparse/legacy/__init__.py
@@ -1,5 +1,3 @@
-# flake8: noqa
-
 # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,9 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .helpers import *
-from .state import *
-from .types import *
 
+# flake8: noqa
 
-from .data import *  # isort:skip
+from .base_pipeline import *
+from .pipeline import *
+from .tasks import *
diff --git a/src/deepsparse/base_pipeline.py b/src/deepsparse/legacy/base_pipeline.py
similarity index 98%
rename from src/deepsparse/base_pipeline.py
rename to src/deepsparse/legacy/base_pipeline.py
index 156ea38656..c5d006fc80 100644
--- a/src/deepsparse/base_pipeline.py
+++ b/src/deepsparse/legacy/base_pipeline.py
@@ -19,10 +19,10 @@
 from pydantic import BaseModel
 
 from deepsparse import Context
+from deepsparse.legacy.tasks import SupportedTasks, dynamic_import_task
 from deepsparse.loggers.base_logger import BaseLogger
 from deepsparse.loggers.build_logger import logger_from_config
 from deepsparse.loggers.constants import validate_identifier
-from deepsparse.tasks import SupportedTasks, dynamic_import_task
 
 
 __all__ = [
@@ -166,7 +166,7 @@ def create(
             implementation
         :return: pipeline object initialized for the given task
         """
-        from deepsparse.pipeline import Bucketable, BucketingPipeline, Pipeline
+        from deepsparse.legacy.pipeline import Bucketable, BucketingPipeline, Pipeline
 
         pipeline_constructor = BasePipeline._get_task_constructor(task)
         model_path = kwargs.get("model_path", None)
@@ -278,7 +278,7 @@ def from_config(
             logging. Default is None
         :return: loaded Pipeline object from the config
         """
-        from deepsparse.pipeline import PipelineConfig
+        from deepsparse.legacy.pipeline import PipelineConfig
 
         if isinstance(config, Path) or (
             isinstance(config, str) and os.path.exists(config)
@@ -308,7 +308,7 @@ def to_config(self) -> "PipelineConfig":  # noqa: F821
         """
         :return: PipelineConfig that can be used to reload this object
         """
-        from deepsparse.pipeline import PipelineConfig
+        from deepsparse.legacy.pipeline import PipelineConfig
 
         if not hasattr(self, "task"):
             raise RuntimeError(
diff --git a/src/deepsparse/v2/image_classification/__init__.py b/src/deepsparse/legacy/image_classification/__init__.py
similarity index 85%
rename from src/deepsparse/v2/image_classification/__init__.py
rename to src/deepsparse/legacy/image_classification/__init__.py
index 8668227df7..10a3971bf8 100644
--- a/src/deepsparse/v2/image_classification/__init__.py
+++ b/src/deepsparse/legacy/image_classification/__init__.py
@@ -13,8 +13,5 @@
 # limitations under the License.
 
 # flake8: noqa
-from .postprocess_operator import *
-from .preprocess_operator import *
 
-
-from .pipeline import *  # isort:skip
+from .pipelines import *
diff --git a/src/deepsparse/image_classification/pipelines.py b/src/deepsparse/legacy/image_classification/pipelines.py
similarity index 99%
rename from src/deepsparse/image_classification/pipelines.py
rename to src/deepsparse/legacy/image_classification/pipelines.py
index d55a5d138d..dd6bd7bb86 100644
--- a/src/deepsparse/image_classification/pipelines.py
+++ b/src/deepsparse/legacy/image_classification/pipelines.py
@@ -31,7 +31,7 @@
     ImageClassificationInput,
     ImageClassificationOutput,
 )
-from deepsparse.pipeline import Pipeline
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.utils import model_to_path
 
 
diff --git a/src/deepsparse/legacy/pipeline.py b/src/deepsparse/legacy/pipeline.py
new file mode 100644
index 0000000000..7f38587707
--- /dev/null
+++ b/src/deepsparse/legacy/pipeline.py
@@ -0,0 +1,1348 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Classes and registry for end to end inference pipelines that wrap an underlying
+inference engine and include pre/postprocessing
+"""
+import os
+from abc import ABC, abstractmethod
+from concurrent.futures import ThreadPoolExecutor
+from functools import partial
+from pathlib import Path
+from typing import Any, Dict, Generator, List, Optional, Tuple, Type, Union
+
+import numpy
+from pydantic import BaseModel, Field
+
+from deepsparse import Context, Engine, MultiModelEngine, Scheduler
+from deepsparse.benchmark import ORTEngine, TorchScriptEngine
+from deepsparse.cpu import cpu_details
+from deepsparse.legacy.base_pipeline import (
+    _REGISTERED_PIPELINES,
+    BasePipeline,
+    SupportedTasks,
+)
+from deepsparse.loggers.base_logger import BaseLogger
+from deepsparse.loggers.constants import MetricCategories, SystemGroups
+from deepsparse.utils import (
+    InferenceStages,
+    StagedTimer,
+    TimerManager,
+    join_engine_outputs,
+    split_engine_inputs,
+)
+
+
+__all__ = [
+    "DEEPSPARSE_ENGINE",
+    "ORT_ENGINE",
+    "TORCHSCRIPT_ENGINE",
+    "SUPPORTED_PIPELINE_ENGINES",
+    "Pipeline",
+    "BasePipeline",
+    "SupportedTasks",
+    "_REGISTERED_PIPELINES",
+    "PipelineConfig",
+    "question_answering_pipeline",
+    "text_classification_pipeline",
+    "zero_shot_text_classification_pipeline",
+    "token_classification_pipeline",
+    "image_classification_pipeline",
+    "yolo_pipeline",
+    "Bucketable",
+    "BucketingPipeline",
+    "create_engine",
+    "TextGeneration",
+    "CodeGeneration",
+    "Chat",
+]
+
+DEEPSPARSE_ENGINE = "deepsparse"
+ORT_ENGINE = "onnxruntime"
+TORCHSCRIPT_ENGINE = "torchscript"
+
+SUPPORTED_PIPELINE_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE]
+
+
+class Pipeline(BasePipeline):
+    """
+    Generic Pipeline abstract class meant to wrap inference engine objects to include
+    data pre/post-processing. Inputs and outputs of pipelines should be serialized
+    as pydantic Models. See the BasePipeline above for additional parameters provided
+    during inference.
+
+    Pipelines should not be instantiated by their constructors, but rather the
+    `Pipeline.create()` method. The task name given to `create` will be used to
+    load the appropriate pipeline. When creating a Pipeline, the pipeline should
+    inherit from `Pipeline` and implement the `setup_onnx_file_path`, `process_inputs`,
+    `process_engine_outputs`, `input_schema`, and `output_schema` abstract methods.
+
+    Finally, the class definition should be decorated by the `Pipeline.register`
+    function. This defines the task name and task aliases for the pipeline and
+    ensures that it will be accessible by `Pipeline.create`. The implemented
+    `Pipeline` subclass must be imported at runtime to be accessible.
+
+    Pipeline lifecycle:
+     - On instantiation
+         * `onnx_file_path` <- `setup_onnx_file_path`
+         * `engine` <- `_initialize_engine`
+
+     - on __call__:
+         * `parsed_inputs: input_schema` <- `parse_inputs(*args, **kwargs)`
+         * `pre_processed_inputs` <- `process_inputs(parsed_inputs)`
+         * `engine_outputs` <- `engine(pre_processed_inputs)`
+         * `outputs: output_schema` <- `process_engine_outputs(engine_outputs)`
+
+    Example use of register:
+     ```python
+     @Pipeline.register(
+     task="example_task",
+     task_aliases=["example_alias_1", "example_alias_2"],
+     )
+     class PipelineImplementation(Pipeline):
+     # implementation of Pipeline abstract methods here
+     ```
+
+    Example use of pipeline:
+     ```python
+     example_pipeline = Pipeline.create(
+         task="example_task",
+         model_path="model.onnx",
+     )
+     pipeline_outputs = example_pipeline(pipeline_inputs)
+     ```
+
+    :param model_path: path on local system or SparseZoo stub to load the model from
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. None represents
+        dynamic batch mode (Pipeline will accept any batch size). Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param num_streams: The max number of requests the model can handle
+        concurrently. None or 0 implies a scheduler-defined default value;
+        default None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param context: Optional Context object to use for creating instances of
+        MultiModelEngine. The Context contains a shared scheduler along with
+        other runtime information that will be used across instances of the
+        MultiModelEngine to provide optimal performance when running multiple
+        models concurrently
+    :param executor: An optional ThreadPoolExecutor() object, if provided the
+        pipeline executes inference requests in a non-blocking manner and returns
+        a Future object, call Future.result() on returned object to get the result.
+        Can also accept an int number of workers, a ThreadPoolExecutor object is
+        auto-initialized with the specified integer in that case; None represents
+        synchronous execution - if running in dynamic batch mode a default
+        ThreadPoolExecutor with default workers equal to the number of available
+        cores / 2
+    """
+
+    def __init__(
+        self,
+        model_path: str,
+        engine_type: str = DEEPSPARSE_ENGINE,
+        batch_size: Optional[int] = 1,
+        num_cores: int = None,
+        num_streams: int = None,
+        scheduler: Scheduler = None,
+        input_shapes: List[List[int]] = None,
+        context: Optional[Context] = None,
+        executor: Optional[Union[ThreadPoolExecutor, int]] = None,
+        benchmark: bool = False,
+        _delay_engine_initialize: bool = False,  # internal use only
+        **kwargs,
+    ):
+        self._benchmark = benchmark
+        self._model_path_orig = model_path
+        self._model_path = model_path
+        self._engine_type = engine_type
+        self._batch_size = batch_size
+        self._timer_manager = TimerManager(enabled=True, multi=benchmark)
+        self.context = context
+        super().__init__(**kwargs)
+
+        self.executor, self._num_async_workers = _initialize_executor_and_workers(
+            batch_size=batch_size,
+            workers_or_executor=executor,
+        )
+
+        if self.context is not None:
+            num_cores = num_cores or self.context.num_cores
+            if self.context.num_cores != num_cores:
+                raise ValueError(
+                    f"num_cores mismatch. Expected {self.context.num_cores} "
+                    f"from passed context, but got {num_cores} while "
+                    f"instantiating Pipeline"
+                )
+
+        self._engine_args = dict(
+            batch_size=self._batch_size or 1,  # bs=1 for dynamic batch
+            num_cores=num_cores,
+            input_shapes=input_shapes,
+        )
+        if engine_type.lower() == DEEPSPARSE_ENGINE:
+            self._engine_args["scheduler"] = scheduler
+            self._engine_args["num_streams"] = num_streams
+
+        self.onnx_file_path = self.setup_onnx_file_path()
+
+        if _delay_engine_initialize:
+            self.engine = None
+        else:
+            self.engine = self._initialize_engine()
+        self._batch_size = self._batch_size or 1
+
+        self.log(
+            identifier=f"{SystemGroups.INFERENCE_DETAILS}/num_cores_total",
+            value=num_cores,
+            category=MetricCategories.SYSTEM,
+        )
+
+    def __call__(self, *args, **kwargs) -> BaseModel:
+        with self.timer_manager.new_timer_context() as timer:
+            if "engine_inputs" in kwargs:
+                raise ValueError(
+                    "invalid kwarg engine_inputs. engine inputs determined "
+                    f"by {self.__class__.__qualname__}.parse_inputs"
+                )
+
+            # ------ PREPROCESSING ------
+            timer.start(InferenceStages.PRE_PROCESS)
+            # parse inputs into input_schema
+            pipeline_inputs = self.parse_inputs(*args, **kwargs)
+            self.log(
+                identifier="pipeline_inputs",
+                value=pipeline_inputs,
+                category=MetricCategories.DATA,
+            )
+
+            if not isinstance(pipeline_inputs, self.input_schema):
+                raise RuntimeError(
+                    f"Unable to parse {self.__class__} inputs into a "
+                    f"{self.input_schema} object. "
+                    f"Inputs parsed to {type(pipeline_inputs)}"
+                )
+            # batch size of the inputs may be `> self._batch_size` at this point
+            engine_inputs = self.process_inputs(pipeline_inputs)
+            if isinstance(engine_inputs, tuple):
+                engine_inputs, context = engine_inputs
+            else:
+                context = {}
+
+            timer.stop(InferenceStages.PRE_PROCESS)
+            self.log(
+                identifier="engine_inputs",
+                value=engine_inputs,
+                category=MetricCategories.DATA,
+            )
+
+            # ------ INFERENCE ------
+            # split inputs into batches of size `self._batch_size`
+            timer.start(InferenceStages.ENGINE_FORWARD)
+            batches, orig_batch_size = self.split_engine_inputs(
+                engine_inputs, self._batch_size
+            )
+
+            # submit split batches to engine threadpool
+            engine_forward_with_context = partial(self.engine_forward, context=context)
+            batch_outputs = list(
+                self.executor.map(engine_forward_with_context, batches)
+            )
+
+            # join together the batches of size `self._batch_size`
+            engine_outputs = self.join_engine_outputs(
+                batch_outputs, orig_batch_size, **context
+            )
+            timer.stop(InferenceStages.ENGINE_FORWARD)
+
+            self.log(
+                identifier=f"{SystemGroups.INFERENCE_DETAILS}/input_batch_size_total",
+                # to get the batch size of the inputs, we need to look
+                # to multiply the engine batch size (self._batch_size)
+                # by the number of batches processed by the engine during
+                # a single inference call
+                value=len(batch_outputs) * self._batch_size,
+                category=MetricCategories.SYSTEM,
+            )
+            self.log(
+                identifier="engine_outputs",
+                value=engine_outputs,
+                category=MetricCategories.DATA,
+            )
+
+            # ------ POSTPROCESSING ------
+            timer.start(InferenceStages.POST_PROCESS)
+            pipeline_outputs = self.process_engine_outputs(engine_outputs, **context)
+            if not isinstance(pipeline_outputs, (self.output_schema, Generator)):
+                raise ValueError(
+                    f"Outputs of {self.__class__} must be instances of "
+                    f"{self.output_schema} found output of type "
+                    f"{type(pipeline_outputs)}"
+                )
+            timer.stop(InferenceStages.POST_PROCESS)
+            self.log(
+                identifier="pipeline_outputs",
+                value=pipeline_outputs,
+                category=MetricCategories.DATA,
+            )
+
+        self.log_inference_times(timer)
+
+        return pipeline_outputs
+
+    @classmethod
+    def from_config(
+        cls,
+        config: Union["PipelineConfig", str, Path],
+        context: Optional[Context] = None,
+        logger: Optional[BaseLogger] = None,
+    ) -> "Pipeline":
+        """
+        :param config: PipelineConfig object, filepath to a json serialized
+            PipelineConfig, or raw string of a json serialized PipelineConfig
+        :param context: Optional Context object to use for creating instances of
+            MultiModelEngine. The Context contains a shared scheduler along with
+            other runtime information that will be used across instances of the
+            MultiModelEngine to provide optimal performance when running
+            multiple models concurrently
+        :param logger: An optional DeepSparse Logger object for inference
+            logging. Default is None
+        :return: loaded Pipeline object from the config
+        """
+        if isinstance(config, Path) or (
+            isinstance(config, str) and os.path.exists(config)
+        ):
+            if isinstance(config, str):
+                config = Path(config)
+            config = PipelineConfig.parse_file(config)
+        if isinstance(config, str):
+            config = PipelineConfig.parse_raw(config)
+
+        return cls.create(
+            task=config.task,
+            model_path=config.model_path,
+            engine_type=config.engine_type,
+            batch_size=config.batch_size,
+            num_cores=config.num_cores,
+            scheduler=config.scheduler,
+            input_shapes=config.input_shapes,
+            alias=config.alias,
+            context=context,
+            logger=logger,
+            **config.kwargs,
+        )
+
+    @abstractmethod
+    def setup_onnx_file_path(self) -> str:
+        """
+        Performs any setup to unwrap and process the given `model_path` and other
+        class properties into an inference ready onnx file to be compiled by the
+        engine of the pipeline
+
+        :return: file path to the ONNX file for the engine to compile
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def process_inputs(
+        self,
+        inputs: BaseModel,
+    ) -> Union[List[numpy.ndarray], Tuple[List[numpy.ndarray], Dict[str, Any]]]:
+        """
+        :param inputs: inputs to the pipeline. Must be the type of the `input_schema`
+            of this pipeline
+        :return: inputs of this model processed into a list of numpy arrays that
+            can be directly passed into the forward pass of the pipeline engine. Can
+            also include a tuple with engine inputs and special key word arguments
+            to pass to process_engine_outputs to facilitate information from the raw
+            inputs to postprocessing that may not be included in the engine inputs
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def process_engine_outputs(
+        self,
+        engine_outputs: List[numpy.ndarray],
+        **kwargs,
+    ) -> BaseModel:
+        """
+        :param engine_outputs: list of numpy arrays that are the output of the engine
+            forward pass
+        :return: outputs of engine post-processed into an object in the `output_schema`
+            format of this pipeline
+        """
+        raise NotImplementedError()
+
+    @property
+    def model_path_orig(self) -> str:
+        """
+        :return: value originally passed to the `model_path` argument to initialize
+            this Pipeline
+        """
+        return self._model_path_orig
+
+    @property
+    def model_path(self) -> str:
+        """
+        :return: path on local system to the onnx file of this model or directory
+            containing a model.onnx file along with supporting files
+        """
+        return self._model_path
+
+    @property
+    def engine_args(self) -> Dict[str, Any]:
+        """
+        :return: arguments besides onnx filepath used to instantiate engine
+        """
+        return self._engine_args
+
+    @property
+    def engine_type(self) -> str:
+        """
+        :return: type of inference engine used for model forward pass
+        """
+        return self._engine_type
+
+    @property
+    def timer_manager(self) -> TimerManager:
+        return self._timer_manager
+
+    @property
+    def current_timer(self) -> Optional[StagedTimer]:
+        """
+        :return: current timer for the pipeline, if any
+        """
+        timer = self.timer_manager.current
+
+        if timer is None:
+            timer = self.timer_manager.latest
+
+        return timer
+
+    @property
+    def benchmark(self) -> bool:
+        return self._benchmark
+
+    @benchmark.setter
+    def benchmark(self, value: bool):
+        self._benchmark = value
+        self.timer_manager.multi = value
+
+    def to_config(self) -> "PipelineConfig":
+        """
+        :return: PipelineConfig that can be used to reload this object
+        """
+
+        if not hasattr(self, "task"):
+            raise RuntimeError(
+                f"{self.__class__} instance has no attribute task. Pipeline objects "
+                "must have a task to be serialized to a config. Pipeline objects "
+                "must be declared with the Pipeline.register object to be assigned a "
+                "task"
+            )
+
+        # parse any additional properties as kwargs
+        kwargs = {}
+        for attr_name, attr in self.__class__.__dict__.items():
+            if isinstance(attr, property) and attr_name not in dir(PipelineConfig):
+                kwargs[attr_name] = getattr(self, attr_name)
+
+        return PipelineConfig(
+            task=self.task,
+            model_path=self.model_path_orig,
+            engine_type=self.engine_type,
+            batch_size=self._batch_size,
+            num_cores=self._engine_args.get("num_cores"),
+            scheduler=self._engine_args.get("scheduler"),
+            input_shapes=self._engine_args.get("input_shapes"),
+            alias=self.alias,
+            kwargs=kwargs,
+        )
+
+    def join_engine_outputs(
+        self, batch_outputs: List[List[numpy.ndarray]], orig_batch_size: int, **kwargs
+    ) -> List[numpy.ndarray]:
+        """
+        Joins list of engine outputs together into one list.
+        This is the opposite of `split_engine_inputs` and is meant to be used in tandem.
+
+        :param batch_outputs: list of engine outputs
+        :param orig_batch_size: original batch size of the inputs
+        :return: list of engine outputs joined together
+        """
+        return join_engine_outputs(batch_outputs, orig_batch_size)
+
+    def split_engine_inputs(
+        self, items: List[numpy.ndarray], batch_size: int
+    ) -> List[List[numpy.ndarray]]:
+        """
+        Splits each item into numpy arrays with the first dimension == `batch_size`.
+        This is the opposite of `join_engine_outputs` and is meant to be used in tandem.
+
+        :param items: size of each batch to split into
+        :param batch_size: size of each batch to enforce
+
+        :return: list of batches, where each batch is a list of numpy arrays
+        """
+        return split_engine_inputs(items, batch_size)
+
+    def engine_forward(
+        self,
+        engine_inputs: List[numpy.ndarray],
+        context: Dict = {},
+    ) -> List[numpy.ndarray]:
+        """
+        :param engine_inputs: list of numpy inputs to Pipeline engine forward
+            pass
+        :param context: optional dictionary to be used during engine execution
+        :return: result of forward pass to Pipeline engine
+        """
+        return self.engine(engine_inputs)
+
+    def log_inference_times(self, timer: StagedTimer):
+        """
+        logs stage times in the given timer
+
+        :param timer: timer to log
+        """
+        for stage, time in timer.times.items():
+            self.log(
+                identifier=f"{SystemGroups.PREDICTION_LATENCY}/{stage}_seconds",
+                value=time,
+                category=MetricCategories.SYSTEM,
+            )
+
+    def _initialize_engine(
+        self,
+    ) -> Union[Engine, MultiModelEngine, ORTEngine, TorchScriptEngine]:
+        return create_engine(
+            self.onnx_file_path, self.engine_type, self._engine_args, self.context
+        )
+
+    def _properties_dict(self) -> Dict:
+        return {
+            "config": self.to_config(),
+            "engine": self.engine,
+        }
+
+    def __repr__(self):
+        """
+        :return: Unambiguous representation of the current pipeline
+        """
+        return "{}({})".format(self.__class__, self._properties_dict())
+
+    def __str__(self):
+        """
+        :return: Human readable form of the current pipeline
+        """
+        formatted_props = [
+            "\t{}: {}".format(key, val) for key, val in self._properties_dict().items()
+        ]
+
+        return "{}.{}:\n{}".format(
+            self.__class__.__module__,
+            self.__class__.__qualname__,
+            "\n".join(formatted_props),
+        )
+
+
+class PipelineConfig(BaseModel):
+    """
+    Configuration for creating a Pipeline object
+
+    Can be used to create a Pipeline from a config object or file with
+    Pipeline.from_config(), or used as a building block for other configs
+    such as for deepsparse.server
+    """
+
+    task: str = Field(
+        description="name of task to create a pipeline for",
+    )
+    model_path: str = Field(
+        default=None,
+        description="path on local system or SparseZoo stub to load the model from",
+    )
+    engine_type: str = Field(
+        default=DEEPSPARSE_ENGINE,
+        description=(
+            "inference engine to use. Currently supported values include "
+            "'deepsparse' and 'onnxruntime'. Default is 'deepsparse'"
+        ),
+    )
+    batch_size: Optional[int] = Field(
+        default=1,
+        description=("static batch size to use for inference. Default is 1"),
+    )
+    num_cores: int = Field(
+        default=None,
+        description=(
+            "number of CPU cores to allocate for inference engine. None"
+            "specifies all available cores. Default is None"
+        ),
+    )
+    scheduler: Optional[str] = Field(
+        default="async",
+        description=(
+            "(deepsparse only) kind of scheduler to execute with. Defaults to async"
+        ),
+    )
+    input_shapes: List[List[int]] = Field(
+        default=None,
+        description=(
+            "list of shapes to set ONNX the inputs to. Pass None to use model as-is. "
+            "Default is None"
+        ),
+    )
+    alias: str = Field(
+        default=None,
+        description=(
+            "optional name to give this pipeline instance, useful when inferencing "
+            "with multiple models. Default is None"
+        ),
+    )
+    kwargs: Dict[str, Any] = Field(
+        default={},
+        description=(
+            "Additional arguments for inference with the model that will be passed "
+            "into the pipeline as kwargs"
+        ),
+    )
+
+
+class BucketingPipeline(object):
+    """
+    A Proxy class that adds Bucketing functionality to Pipelines
+
+    :param pipelines: A list of Pipeline objects/buckets that implement
+        `Bucketable` contract
+    """
+
+    def __init__(self, pipelines: List[Pipeline]):
+        if not (pipelines and isinstance(pipelines, list)):
+            raise ValueError(
+                "Expected a non empty List of pipeline objects but got " f"{pipelines}"
+            )
+        self._pipelines = pipelines
+        self._pipeline_class = pipelines[0].__class__
+        self._validate_pipeline_class()
+
+    def __call__(self, *args, **kwargs):
+        bucket, parsed_inputs = self._choose_bucket(*args, **kwargs)
+        return bucket(parsed_inputs)
+
+    def _choose_bucket(self, *args, **kwargs):
+        parsed_inputs = self._pipelines[-1].parse_inputs(*args, **kwargs)
+        bucket = self._pipeline_class.route_input_to_bucket(
+            input_schema=parsed_inputs,
+            pipelines=self._pipelines,
+        )
+        return bucket, parsed_inputs
+
+    def __getattr__(self, item):
+        value = getattr(self._pipelines[0].__class__, item)
+
+        if isinstance(value, property):
+            return getattr(self._pipelines[0], item)
+
+        raise AttributeError(
+            f"{item} not found in {self.__class__.__name__}, "
+            f"and is not a property of {self._pipeline_class.__name__}"
+        )
+
+    @property
+    def input_schema(self) -> Type[BaseModel]:
+        """
+        :return: pydantic model class that inputs to this pipeline must comply to
+        """
+        return self._pipelines[0].input_schema
+
+    @property
+    def output_schema(self) -> Type[BaseModel]:
+        """
+        :return: pydantic model class that outputs of this pipeline must comply to
+        """
+        return self._pipelines[0].output_schema
+
+    def _validate_pipeline_class(self):
+        # validate all pipelines belong to the same class
+
+        if not issubclass(self._pipeline_class, Bucketable):
+            raise ValueError(f"{self._pipeline_class} is not Bucketable")
+
+        is_valid = all(
+            isinstance(pipeline, self._pipeline_class) for pipeline in self._pipelines
+        )
+
+        if not is_valid:
+            raise ValueError(
+                "All Pipeline Buckets must belong to the same Pipeline Class"
+            )
+
+
+class Bucketable(ABC):
+    """
+    A contract, that ensures implementing Pipeline class can create multiple Pipeline
+    instances and route each input sample to correct instance based off of specific
+    implementations of abstract methods defined in this contract
+    """
+
+    @staticmethod
+    @abstractmethod
+    def should_bucket(*args, **kwargs) -> bool:
+        """
+        :returns: True if buckets should be created else False
+        """
+        pass
+
+    @staticmethod
+    @abstractmethod
+    def create_pipeline_buckets(*args, **kwargs) -> List[Pipeline]:
+        """
+        :return: Create and return a list of Pipeline objects
+            representing different buckets
+        """
+        pass
+
+    @staticmethod
+    @abstractmethod
+    def route_input_to_bucket(
+        *args, input_schema: BaseModel, pipelines: List[Pipeline], **kwargs
+    ) -> Pipeline:
+        """
+        :param input_schema: The schema representing an input to the pipeline
+        :param pipelines: Different buckets to be used
+        :return: The correct Pipeline object (or Bucket) to route input to
+        """
+        pass
+
+
+def create_engine(
+    onnx_file_path: str,
+    engine_type: str,
+    engine_args: Dict,
+    context: Optional[Context] = None,
+) -> Union[Engine, MultiModelEngine, ORTEngine]:
+    """
+    Create an inference engine for a given ONNX model
+
+    :param onnx_file_path: path to ONNX model file
+    :param engine_type: type of engine to create.
+    :param engine_args: arguments to pass to engine constructor
+    :param context: context to use for engine
+    :return: inference engine
+    """
+    engine_type = engine_type.lower()
+
+    if engine_type == DEEPSPARSE_ENGINE:
+        if context is not None and isinstance(context, Context):
+            engine_args.pop("num_cores", None)
+            engine_args.pop("scheduler", None)
+            engine_args.pop("num_streams", None)
+            engine_args["context"] = context
+            return MultiModelEngine(
+                model=onnx_file_path,
+                **engine_args,
+            )
+        engine_args.pop("cache_output_bools", None)
+        return Engine(onnx_file_path, **engine_args)
+
+    if engine_type == ORT_ENGINE:
+        return ORTEngine(onnx_file_path, **engine_args)
+
+    if engine_type == TORCHSCRIPT_ENGINE:
+        return TorchScriptEngine(onnx_file_path, **engine_args)
+
+    raise ValueError(
+        f"Unknown engine_type {engine_type}. Supported values include: "
+        f"{SUPPORTED_PIPELINE_ENGINES}"
+    )
+
+
+def _initialize_executor_and_workers(
+    batch_size: Optional[int],
+    workers_or_executor: Optional[Union[int, ThreadPoolExecutor]],
+) -> Tuple[Optional[ThreadPoolExecutor], int]:
+    if isinstance(workers_or_executor, ThreadPoolExecutor):
+        num_async_workers = workers_or_executor._max_workers  # noqa
+        executor = workers_or_executor
+    elif isinstance(workers_or_executor, int):
+        num_async_workers = max(1, workers_or_executor)
+        executor = ThreadPoolExecutor(max_workers=num_async_workers)
+    elif batch_size is None and workers_or_executor is None:
+        # default num workers to num available cores / 2
+        num_cpu_cores_avaailable = cpu_details()[0]
+        num_async_workers = max(1, num_cpu_cores_avaailable // 2)
+        executor = ThreadPoolExecutor(max_workers=num_async_workers)
+    elif workers_or_executor is not None:
+        raise ValueError(
+            "Expected an int or ThreadPoolExecutor to run in async mode"
+            f" but got {workers_or_executor} of type {type(workers_or_executor)}"
+        )
+    else:
+        executor = ThreadPoolExecutor(max_workers=1)
+        num_async_workers = 1
+
+    if batch_size is None and executor is None:
+        raise ValueError(
+            "Must have an ThreadPoolExecutor for running in dynamic batch mode "
+            f"but got {None}"
+        )
+
+    return executor, num_async_workers
+
+
+def text_generation_pipeline(
+    *args, model: Optional[str] = None, **kwargs
+) -> "Pipeline":
+    """
+    :return: text generation pipeline with the given args and
+        kwargs passed to Pipeline.create
+    """
+    kwargs = _parse_model_arg(model, **kwargs)
+    return Pipeline.create("text_generation", *args, **kwargs)
+
+
+def code_generation_pipeline(
+    *args, model: Optional[str] = None, **kwargs
+) -> "Pipeline":
+    """
+    :return: text generation pipeline with the given args and
+        kwargs passed to Pipeline.create
+    """
+    kwargs = _parse_model_arg(model, **kwargs)
+    return Pipeline.create("code_generation", *args, **kwargs)
+
+
+def chat_pipeline(*args, model: Optional[str] = None, **kwargs) -> "Pipeline":
+    """
+    :return: text generation pipeline with the given args and
+        kwargs passed to Pipeline.create
+    """
+    kwargs = _parse_model_arg(model, **kwargs)
+    return Pipeline.create("chat", *args, **kwargs)
+
+
+def _parse_model_arg(model: Optional[str], **kwargs) -> dict:
+    if model is not None:
+        model_path = kwargs.get("model_path")
+        if model_path is not None:
+            raise ValueError(
+                f"Only one of model and model_path may be supplied, found {model} "
+                f"and {model_path} respectively"
+            )
+        kwargs["model_path"] = model
+    return kwargs
+
+
+# aliases for top level import
+TextGeneration = text_generation_pipeline
+CodeGeneration = code_generation_pipeline
+Chat = chat_pipeline
+
+
+def question_answering_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    transformers question_answering pipeline
+
+    example instantiation:
+    ```python
+    question_answering = Pipeline.create(
+        task="question_answering",
+        model_path="question_answering_model_dir/",
+    )
+    ```
+
+    :param model_path: sparsezoo stub to a transformers model or (preferred) a
+        directory containing a model.onnx, tokenizer config, and model config
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param sequence_length: sequence length to compile model and tokenizer for.
+        If a list of lengths is provided, then for each length, a model and
+        tokenizer will be compiled capable of handling that sequence length
+        (also known as a bucket). Default is 128
+    :param doc_stride: if the context is too long to fit with the question for the
+        model, it will be split in several chunks with some overlap. This argument
+        controls the size of that overlap. Currently, only reading the first span
+        is supported (everything after doc_stride will be truncated). Default
+        is 128
+    :param max_question_len: maximum length of the question after tokenization.
+        It will be truncated if needed. Default is 64
+    :param max_answer_len: maximum length of answer after decoding. Default is 15
+    """
+    return Pipeline.create("question_answering", *args, **kwargs)
+
+
+def text_classification_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    transformers text classification pipeline
+
+    example instantiation:
+    ```python
+    text_classifier = Pipeline.create(
+        task="text_classification",
+        model_path="text_classification_model_dir/",
+        batch_size=BATCH_SIZE,
+    )
+    ```
+
+    example batch size 1, single text inputs (ie sentiment analysis):
+    ```python
+    sentiment = text_classifier("the food tastes great")
+    sentiment = text_classifier(["the food tastes great"])
+    sentiment = text_classifier([["the food tastes great"]])
+    ```
+
+    example batch size 1, multi text input (ie QQP like tasks):
+    ```python
+    prediction = text_classifier([["how is the food?", "what is the food?"]])
+    ```
+
+    example batch size n, single text inputs:
+    ```python
+    sentiments = text_classifier(["the food tastes great", "the food tastes bad"])
+    sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]])
+    ```
+
+    :param model_path: sparsezoo stub to a transformers model or (preferred) a
+        directory containing a model.onnx, tokenizer config, and model config
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param sequence_length: sequence length to compile model and tokenizer for.
+        If a list of lengths is provided, then for each length, a model and
+        tokenizer will be compiled capable of handling that sequence length
+        (also known as a bucket). Default is 128
+    :param return_all_scores: if True, instead of returning the prediction as the
+        argmax of model class predictions, will return all scores and labels as
+        a list for each result in the batch. Default is False
+    """
+    return Pipeline.create("text_classification", *args, **kwargs)
+
+
+def sentiment_analysis_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    transformers text classification pipeline
+
+    example instantiation:
+    ```python
+    text_classifier = Pipeline.create(
+        task="text_classification",
+        model_path="text_classification_model_dir/",
+        batch_size=BATCH_SIZE,
+    )
+    ```
+
+    example batch size 1, single text inputs (ie sentiment analysis):
+    ```python
+    sentiment = text_classifier("the food tastes great")
+    sentiment = text_classifier(["the food tastes great"])
+    sentiment = text_classifier([["the food tastes great"]])
+    ```
+
+    example batch size 1, multi text input (ie QQP like tasks):
+    ```python
+    prediction = text_classifier([["how is the food?", "what is the food?"]])
+    ```
+
+    example batch size n, single text inputs:
+    ```python
+    sentiments = text_classifier(["the food tastes great", "the food tastes bad"])
+    sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]])
+    ```
+
+    :param model_path: sparsezoo stub to a transformers model or (preferred) a
+        directory containing a model.onnx, tokenizer config, and model config
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param sequence_length: sequence length to compile model and tokenizer for.
+        If a list of lengths is provided, then for each length, a model and
+        tokenizer will be compiled capable of handling that sequence length
+        (also known as a bucket). Default is 128
+    :param return_all_scores: if True, instead of returning the prediction as the
+        argmax of model class predictions, will return all scores and labels as
+        a list for each result in the batch. Default is False
+    """
+    return Pipeline.create("text_classification", *args, **kwargs)
+
+
+def token_classification_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    transformers token classification pipeline
+
+    example instantiation:
+    ```python
+    token_classifier = Pipeline.create(
+        task="token_classification",
+        model_path="token_classification_model_dir/",
+        batch_size=BATCH_SIZE,
+    )
+    ```
+
+    :param model_path: sparsezoo stub to a transformers model or (preferred) a
+        directory containing a model.onnx, tokenizer config, and model config
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param sequence_length: sequence length to compile model and tokenizer for.
+        If a list of lengths is provided, then for each length, a model and
+        tokenizer will be compiled capable of handling that sequence length
+        (also known as a bucket). Default is 128
+    :param aggregation_strategy: how to aggregate tokens in postprocessing. Options
+        include 'none', 'simple', 'first', 'average', and 'max'. Default is None
+    :param ignore_labels: list of label names to ignore in output. Default is
+        ['0'] which ignores the default known class label
+    """
+    return Pipeline.create("token_classification", *args, **kwargs)
+
+
+def image_classification_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    Image classification pipeline for DeepSparse
+
+    :param model_path: path on local system or SparseZoo stub to load the model from
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param class_names: Optional dict, or json file of class names to use for
+        mapping class ids to class labels. Default is None
+    """
+    return Pipeline.create("image_classification", *args, **kwargs)
+
+
+def yolo_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    Image Segmentation YOLO pipeline for DeepSparse
+
+    :param model_path: path on local system or SparseZoo stub to load the model from
+    :param engine_type: inference engine to use. Currently supported values
+        include 'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param class_names: Optional string identifier, dict, or json file of
+        class names to use for mapping class ids to class labels. Default is
+        `coco`
+    """
+    return Pipeline.create("yolo", *args, **kwargs)
+
+
+def haystack_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    Neural Magic pipeline for running Haystack DocumentSearchPipeline.
+    Supports selected Haystack Nodes as well as Haystack nodes integrated
+    with the Neural Magic DeepSparse Engine
+
+    example embedding model instantiation:
+    ```python
+    haystack_pipeline = Pipeline.create(
+        task="information_retrieval_haystack",
+        model_path="masked_language_modeling_model_dir/",
+        config={
+            "document_store": "InMemoryDocumentStore",
+            "document_store_args": {
+                "similarity": "cosine",
+                "use_gpu": False,
+            },
+            "retriever": "DeepSparseEmbeddingRetriever",
+            "retriever_args": {
+                "extraction_strategy": "reduce_mean"
+            }
+        },
+    )
+    ```
+
+    example deepsparse biencoder instantiation
+    ```python
+    haystack_pipeline = Pipeline.create(
+        task="information_retrieval_haystack",
+        config={
+            "document_store": "InMemoryDocumentStore",
+            "document_store_args": {
+                "similarity": "cosine",
+                "use_gpu": False,
+            },
+            "retriever": "DeepSparseDensePassageRetriever",
+            "retriever_args": {
+                "query_model_path": "./query_model",
+                "passage_model_path": "./passage_model"
+            }
+        },
+    )
+    ```
+
+    writing documents:
+    ```python
+    haystack_pipeline.write_documents([
+        {
+            "title": "Claude Shannon",
+            "content": "Claude Elwood Shannon was an American mathematician, "
+            "electrical engineer, and cryptographer known as a father of "
+            "information theory. He was a 21-year-old master's degree student at "
+            "the Massachusetts Institute of Technology (MIT)."
+        },
+        {
+            "title": "Vincent van Gogh",
+            "content": "Van Gogh was born into an upper-middle-class family. "
+            "As a child he was serious, quiet and thoughtful. He began drawing "
+            "at an early age and as a young man worked as an art dealer."
+        },
+        {
+            "title": "Stevie Wonder",
+            "content": "Stevland Hardaway Morris, known professionally as "
+            "Stevie Wonder, is an American singer and musician, who is "
+            "credited as a pioneer and influence by musicians across a range "
+            "of genres."
+        }
+    ])
+    ```
+
+    example queries:
+    ```python
+    from deepsparse.transformers.haystack import print_pipeline_documents
+    pipeline_outputs = haystack_pipeline(
+        queries="who invented information theory",
+        params={"Retriever": {"top_k": 4}}
+    )
+    print_pipeline_documents(pipeline_outputs)
+
+    pipeline_outputs = haystack_pipeline(
+        queries=[
+            "famous artists",
+            "What is Stevie Wonder's real name?"
+        ],
+        params={"Retriever": {"top_k": 4}}
+    )
+    print_pipeline_documents(pipeline_outputs)
+    ```
+
+    :param model_path: sparsezoo stub to a transformers model or (preferred) a
+        directory containing a model.onnx, tokenizer config, and model config
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param sequence_length: sequence length to compile model and tokenizer for.
+        If a list of lengths is provided, then for each length, a model and
+        tokenizer will be compiled capable of handling that sequence length
+        (also known as a bucket). Default is 128
+    :param docs: list of documents to be written to document_store. Can also
+        be written after instantiation with write_documents method.
+        Default is None
+    :param config: dictionary or instance of HaystackPipelineConfig. Used to
+        specify Haystack node arguments
+    :param retriever_kwargs: keyword arguments to be passed to retriever. If
+        the retriever is a deepsparse retriever, then these arguments will also
+        be passed to the TransformersEmbeddingExtractionPipeline of the retriever
+    """
+    return Pipeline.create("information_retrieval_haystack", *args, **kwargs)
+
+
+def embedding_extraction_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    embedding extraction pipeline for extracting intermediate layer embeddings
+    from transformer models
+
+    example instantiation:
+    ```python
+    embedding_extraction_pipeline = Pipeline.create(
+        task="embedding_extraction",
+        model_path="masked_language_modeling_model_dir/",
+    )
+    results = embedding_extraction_pipeline(
+        [
+            "the warriors have won the nba finals"
+            "the warriors are the greatest basketball team ever"
+        ]
+    )
+    emb_1, emb_2 = results.embeddings
+    # (expect emb_1 and emb_2 to have high cosine similiarity)
+    ```
+
+    :param model_path: sparsezoo stub to a transformers model or (preferred) a
+        directory containing a model.onnx, tokenizer config, and model config
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: static batch size to use for inference. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param sequence_length: sequence length to compile model and tokenizer for.
+        If a list of lengths is provided, then for each length, a model and
+        tokenizer will be compiled capable of handling that sequence length
+        (also known as a bucket). Default is 128
+    :param emb_extraction_layer: if an int, the transformer layer number from
+        which the embeddings will be extracted. If a string, the name of last
+        ONNX node in model to draw embeddings from. If None, leave the model
+        unchanged. Default is -1 (last transformer layer before prediction head)
+    :param model_size: size of transformer model (size of hidden layer per token
+        if the model is cut). Default is 768
+    :param extraction_strategy: method of pooling embedding values. Currently
+        supported values are 'per_token', 'reduce_mean', 'reduce_max' and 'cls_token'.
+        Default is 'per_token'
+    :param return_numpy: return embeddings a list of numpy arrays, list of lists
+        of floats otherwise. Default is True
+    :param context: context for engine. If None, then the engine will be initialized
+        with 2 streams to make use of parallel inference of labels. Default is None
+    """
+    return Pipeline.create("embedding_extraction", *args, **kwargs)
+
+
+def zero_shot_text_classification_pipeline(*args, **kwargs) -> "Pipeline":
+    """
+    Transformers zero shot text classification pipeline. This pipeline allows for
+    text classification using models which were trained on datasets not originally
+    meant for this task.
+
+    This class upon construction returns an instance of a child Pipeline which
+    inherits from ZeroShotTextClassificationPipelineBase. Which type of Pipeline
+    is returned depends on the value of the passed model_scheme argument.
+
+    example dynamic labels:
+    ```python
+    zero_shot_text_classifier = Pipeline.create(
+        task="zero_shot_text_classification",
+        model_scheme="mnli",
+        model_config={"hypothesis_template": "This text is related to {}"},
+        model_path="mnli_model_dir/",
+    )
+
+    sequence_to_classify = "Who are you voting for in 2020?"
+    candidate_labels = ["Europe", "public health", "politics"]
+    zero_shot_text_classifier(sequences=sequence_to_classify, labels=candidate_labels)
+    >>> ZeroShotTextClassificationOutput(
+        sequences='Who are you voting for in 2020?',
+        labels=['politics', 'public health', 'Europe'],
+        scores=[0.9073666334152222, 0.046810582280159, 0.04582275450229645])
+    ```
+
+    example static labels:
+    ```python
+    zero_shot_text_classifier = Pipeline.create(
+        task="zero_shot_text_classification",
+        model_scheme="mnli",
+        model_config={"hypothesis_template": "This text is related to {}"},
+        model_path="mnli_model_dir/",
+        labels=["politics", "Europe", "public health"]
+    )
+
+    sequence_to_classify = "Who are you voting for in 2020?"
+    zero_shot_text_classifier(sequences=sequence_to_classify)
+    >>> ZeroShotTextClassificationOutput(
+        sequences='Who are you voting for in 2020?',
+        labels=['politics', 'public health', 'Europe'],
+        scores=[0.9073666334152222, 0.046810582280159, 0.04582275450229645])
+    ```
+
+    Note that labels must either be provided during pipeline instantiation via
+    the constructor, at inference time, but not both.
+
+    Note that if a hypothesis_template is provided at inference time, then it
+    will override the value provided during model instantiation
+
+    :param model_path: sparsezoo stub to a transformers model or (preferred) a
+        directory containing a model.onnx, tokenizer config, and model config
+    :param engine_type: inference engine to use. Currently supported values include
+        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
+    :param batch_size: batch size must divide sequences * labels, regardless of
+        whether using dynamic or static labels. Default is 1
+    :param num_cores: number of CPU cores to allocate for inference engine. None
+        specifies all available cores. Default is None
+    :param scheduler: (deepsparse only) kind of scheduler to execute with.
+        Pass None for the default
+    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
+        to use model as-is. Default is None
+    :param alias: optional name to give this pipeline instance, useful when
+        inferencing with multiple models. Default is None
+    :param sequence_length: sequence length to compile model and tokenizer for.
+        If a list of lengths is provided, then for each length, a model and
+        tokenizer will be compiled capable of handling that sequence length
+        (also known as a bucket). Default is 128
+    :param default_model_name: huggingface transformers model name to use to
+        load a tokenizer and model config when none are provided in the `model_path`.
+        Default is "bert-base-uncased"
+    :param model_scheme: training scheme used to train the model used for zero shot.
+        Default is "mnli"
+    :param model_config: config object specific to the model_scheme of this model
+        or a dict of config keyword arguments
+    :param labels: static list of labels to perform text classification with. Can
+        also be provided at inference time
+    :param context: context for engine. If None, then the engine will be initialized
+        with 2 streams to make use of parallel inference of labels
+    """
+    return Pipeline.create("zero_shot_text_classification", *args, **kwargs)
diff --git a/src/deepsparse/legacy/tasks.py b/src/deepsparse/legacy/tasks.py
new file mode 100644
index 0000000000..6b23c7d072
--- /dev/null
+++ b/src/deepsparse/legacy/tasks.py
@@ -0,0 +1,428 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Classes and implementations for supported tasks in the DeepSparse pipeline and system
+"""
+
+import importlib
+import logging
+import os
+import sys
+from collections import namedtuple
+from typing import Iterable, List, Optional, Tuple
+
+
+_LOGGER = logging.getLogger(__name__)
+
+__all__ = ["SupportedTasks", "AliasedTask"]
+
+
+class AliasedTask:
+    """
+    A task that can have multiple aliases to match to.
+    For example, question_answering which can alias to qa as well
+
+    :param name: the name of the task such as question_answering or text_classification
+    :param aliases: the aliases the task can go by in addition to the name such as
+        qa, glue, sentiment_analysis, etc
+    """
+
+    def __init__(self, name: str, aliases: List[str]):
+        self._name = name
+        self._aliases = aliases
+
+    @property
+    def name(self) -> str:
+        """
+        :return: the name of the task such as question_answering
+        """
+        return self._name
+
+    @property
+    def aliases(self) -> List[str]:
+        """
+        :return: the aliases the task can go by such as qa, glue, sentiment_analysis
+        """
+        return self._aliases
+
+    def matches(self, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether the given instance matches.
+            Checks the current name as well as any aliases.
+            Everything is compared at lower case and "-" and whitespace
+            are replaced with "_".
+        :return: True if task does match the current instance, False otherwise
+        """
+        task = task.lower().replace("-", "_")
+
+        # replace whitespace with "_"
+        task = "_".join(task.split())
+
+        return task == self.name or task in self.aliases
+
+
+class SupportedTasks:
+    """
+    The supported tasks in the DeepSparse pipeline and system
+    """
+
+    nlp = namedtuple(
+        "nlp",
+        [
+            "question_answering",
+            "text_classification",
+            "token_classification",
+            "zero_shot_text_classification",
+            "transformers_embedding_extraction",
+        ],
+    )(
+        question_answering=AliasedTask("question_answering", ["qa"]),
+        text_classification=AliasedTask(
+            "text_classification", ["glue", "sentiment_analysis"]
+        ),
+        token_classification=AliasedTask("token_classification", ["ner"]),
+        zero_shot_text_classification=AliasedTask("zero_shot_text_classification", []),
+        transformers_embedding_extraction=AliasedTask(
+            "transformers_embedding_extraction", []
+        ),
+    )
+
+    chat = namedtuple("chat", ["chatbot", "chat"])(
+        chatbot=AliasedTask("chatbot", []), chat=AliasedTask("chat", [])
+    )
+    text_generation = namedtuple(
+        "text_generation", ["text_generation", "opt", "bloom"]
+    )(
+        text_generation=AliasedTask("text_generation", []),
+        opt=AliasedTask("opt", []),
+        bloom=AliasedTask("bloom", []),
+    )
+    code_generation = namedtuple("code_generation", ["code_generation", "codegen"])(
+        code_generation=AliasedTask("code_generation", []),
+        codegen=AliasedTask("codegen", []),
+    )
+
+    image_classification = namedtuple("image_classification", ["image_classification"])(
+        image_classification=AliasedTask(
+            "image_classification",
+            ["image_classification"],
+        ),
+    )
+
+    yolo = namedtuple("yolo", ["yolo"])(
+        yolo=AliasedTask("yolo", ["yolo"]),
+    )
+    yolov8 = namedtuple("yolov8", ["yolov8"])(
+        yolov8=AliasedTask("yolov8", ["yolov8"]),
+    )
+    yolact = namedtuple("yolact", ["yolact"])(
+        yolact=AliasedTask("yolact", ["yolact"]),
+    )
+
+    haystack = namedtuple("haystack", ["information_retrieval_haystack"])(
+        information_retrieval_haystack=AliasedTask(
+            "information_retrieval_haystack", ["haystack"]
+        ),
+    )
+    embedding_extraction = namedtuple("embedding_extraction", ["embedding_extraction"])(
+        embedding_extraction=AliasedTask(
+            "embedding_extraction", ["embedding_extraction"]
+        ),
+    )
+    open_pif_paf = namedtuple("open_pif_paf", ["open_pif_paf"])(
+        open_pif_paf=AliasedTask("open_pif_paf", ["open_pif_paf"]),
+    )
+
+    all_task_categories = [
+        nlp,
+        image_classification,
+        yolo,
+        yolov8,
+        yolact,
+        haystack,
+        embedding_extraction,
+        open_pif_paf,
+        text_generation,
+        chat,
+        code_generation,
+    ]
+
+    @classmethod
+    def check_register_task(
+        cls, task: str, extra_tasks: Optional[Iterable[str]] = None
+    ):
+        """
+        :param task: task name to validate and import dependencies for
+        :param extra_tasks: valid task names that are not included in supported tasks.
+            i.e. tasks registered to Pipeline at runtime
+        """
+        if task == "custom":
+            # custom task, register the CustomPipeline
+            import deepsparse.pipelines.custom_pipeline  # noqa: F401
+
+        elif cls.is_text_generation(task):
+            # noqa: F401
+            import deepsparse.legacy.transformers.pipelines.text_generation
+
+        elif cls.is_chat(task):
+            import deepsparse.transformers.pipelines.chat  # noqa: F401
+
+        elif cls.is_code_generation(task):
+            import deepsparse.transformers.pipelines.code_generation  # noqa: F401
+
+        elif cls.is_nlp(task):
+            # trigger transformers pipelines to register with Pipeline.register
+            import deepsparse.transformers.pipelines  # noqa: F401
+
+        elif cls.is_image_classification(task):
+            # trigger image classification pipelines to
+            # register with Pipeline.register
+            import deepsparse.legacy.image_classification.pipelines  # noqa: F401
+
+        elif cls.is_yolact(task):
+            # trigger yolo pipelines to register with Pipeline.register
+            import deepsparse.yolact.pipelines  # noqa: F401
+
+        elif cls.is_yolo(task):
+            # trigger yolo pipelines to register with Pipeline.register
+            import deepsparse.yolo.pipelines  # noqa: F401
+
+        elif cls.is_yolov8(task):
+            # trigger yolo pipelines to register with Pipeline.register
+            import deepsparse.yolov8.pipelines  # noqa: F401
+
+        elif cls.is_haystack(task):
+            # trigger haystack pipeline as well as transformers pipelines to
+            # register with Pipeline.register
+            import deepsparse.transformers.haystack  # noqa: F401
+
+        elif cls.is_embedding_extraction(task):
+            # trigger embedding_extraction pipelines to register with
+            #  Pipeline.register
+            import deepsparse.pipelines.embedding_extraction  # noqa :F401
+
+        elif cls.is_open_pif_paf(task):
+            # trigger embedding_extraction pipelines to register with
+            #  Pipeline.register
+            import deepsparse.open_pif_paf.pipelines  # noqa :F401
+
+        all_tasks = set(cls.task_names() + (list(extra_tasks or [])))
+        if task not in all_tasks:
+            raise ValueError(
+                f"Unknown Pipeline task {task}. Currently supported tasks are "
+                f"{list(all_tasks)}"
+            )
+
+    @classmethod
+    def is_chat(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is a chat task
+        :return: True if it is a chat task, False otherwise
+        """
+        return any(chat_task.matches(task) for chat_task in cls.chat)
+
+    @classmethod
+    def is_text_generation(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is a text generation task
+            such as codegen
+        :return: True if it is a text generation task, False otherwise
+        """
+        return any(
+            text_generation_task.matches(task)
+            for text_generation_task in cls.text_generation
+        )
+
+    @classmethod
+    def is_code_generation(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is a text generation task
+            such as codegen
+        :return: True if it is a text generation task, False otherwise
+        """
+        return any(
+            code_generation_task.matches(task)
+            for code_generation_task in cls.code_generation
+        )
+
+    @classmethod
+    def is_nlp(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is an nlp task
+            such as question_answering
+        :return: True if it is an nlp task, False otherwise
+        """
+        return any([nlp_task.matches(task) for nlp_task in cls.nlp])
+
+    @classmethod
+    def is_cv(cls, task: str) -> bool:
+        return (
+            cls.is_yolo(task)
+            or cls.is_yolov8(task)
+            or cls.is_yolact(task)
+            or cls.is_image_classification(task)
+            or cls.is_open_pif_paf(task)
+        )
+
+    @classmethod
+    def is_image_classification(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is an image
+            classification task
+        :return: True if it is an image classification task, False otherwise
+        """
+        return any([ic_task.matches(task) for ic_task in cls.image_classification])
+
+    @classmethod
+    def is_yolo(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is an image
+            segmentation task using YOLO
+        :return: True if it is an segmentation task using YOLO, False otherwise
+        """
+        return any([yolo_task.matches(task) for yolo_task in cls.yolo])
+
+    @classmethod
+    def is_yolov8(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is an image
+            segmentation task using YOLOv8
+        :return: True if it is an segmentation task using YOLOv8, False otherwise
+        """
+        return any([yolov8_task.matches(task) for yolov8_task in cls.yolov8])
+
+    @classmethod
+    def is_yolact(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is an image
+            segmentation task using YOLO
+        :return: True if it is an segmentation task using YOLO, False otherwise
+        """
+        return any([yolact_task.matches(task) for yolact_task in cls.yolact])
+
+    @classmethod
+    def is_haystack(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is a haystack task
+        :return: True if it is a haystack task, False otherwise
+        """
+        return any([haystack_task.matches(task) for haystack_task in cls.haystack])
+
+    @classmethod
+    def is_embedding_extraction(cls, task):
+        """
+        :param task: the name of the task to check whether it is an
+            embedding_extraction task
+        :return: True if it is an embedding_extraction task, False otherwise
+        """
+        return any(
+            embedding_extraction_task.matches(task)
+            for embedding_extraction_task in cls.embedding_extraction
+        )
+
+    @classmethod
+    def is_open_pif_paf(cls, task):
+        """
+        :param task: the name of the task to check whether it is an
+            embedding_extraction task
+        :return: True if it is an open_pif_paf task, False otherwise
+        """
+        return any(
+            open_pif_paf_task.matches(task) for open_pif_paf_task in cls.open_pif_paf
+        )
+
+    @classmethod
+    def task_names(cls):
+        task_names = ["custom"]
+        for task_category in cls.all_task_categories:
+            for task in task_category:
+                unique_aliases = (
+                    alias for alias in task._aliases if alias != task._name
+                )
+                task_names += (task._name, *unique_aliases)
+        return task_names
+
+
+def dynamic_import_task(module_or_path: str) -> str:
+    """
+    Dynamically imports `module` with importlib, and returns the `TASK`
+    attribute on the module (something like `importlib.import_module(module).TASK`).
+
+    Example contents of `module`:
+    ```python
+    from deepsparse.pipeline import Pipeline
+    from deepsparse.transformers.pipelines.question_answering import (
+        QuestionAnsweringPipeline,
+    )
+
+    TASK = "my_qa_task"
+    Pipeline.register(TASK)(QuestionAnsweringPipeline)
+    ```
+
+    NOTE: this modifies `sys.path`.
+
+    :raises FileNotFoundError: if path does not exist
+    :raises RuntimeError: if the imported module does not contain `TASK`
+    :raises RuntimeError: if the module doesn't register the task
+    :return: The task from the imported module.
+    """
+    parent_dir, module_name = _split_dir_and_name(module_or_path)
+    if not os.path.exists(os.path.join(parent_dir, module_name + ".py")):
+        raise FileNotFoundError(
+            f"Unable to find file for {module_or_path}. "
+            f"Looked for {module_name}.py under {parent_dir if parent_dir else '.'}"
+        )
+
+    # add parent_dir to sys.path so we can import the file as a module
+    sys.path.append(os.curdir)
+    if parent_dir:
+        _LOGGER.info(f"Adding {parent_dir} to sys.path")
+        sys.path.append(parent_dir)
+
+    # do the import
+    _LOGGER.info(f"Importing '{module_name}'")
+    module_or_path = importlib.import_module(module_name)
+
+    if not hasattr(module_or_path, "TASK"):
+        raise RuntimeError(
+            "When using --task import:<module>, "
+            "module must set the `TASK` attribute."
+        )
+
+    task = getattr(module_or_path, "TASK")
+    _LOGGER.info(f"Using task={repr(task)}")
+
+    return task
+
+
+def _split_dir_and_name(module_or_path: str) -> Tuple[str, str]:
+    """
+    Examples:
+    - `a` -> `("", "a")`
+    - `a.b` -> `("a", "b")`
+    - `a.b.c` -> `("a/b", "c")`
+
+    :return: module split into directory & name
+    """
+    if module_or_path.endswith(".py"):
+        # assume path
+        split_char = os.sep
+        module_or_path = module_or_path.replace(".py", "")
+    else:
+        # assume module
+        split_char = "."
+    *dirs, module_name = module_or_path.split(split_char)
+    parent_dir = os.sep if dirs == [""] else os.sep.join(dirs)
+    return parent_dir, module_name
diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/__init__.py b/src/deepsparse/legacy/transformers/__init__.py
similarity index 100%
rename from tests/deepsparse/transformers/pipelines/integration_tests/__init__.py
rename to src/deepsparse/legacy/transformers/__init__.py
diff --git a/src/deepsparse/v2/__init__.py b/src/deepsparse/legacy/transformers/pipelines/__init__.py
similarity index 82%
rename from src/deepsparse/v2/__init__.py
rename to src/deepsparse/legacy/transformers/pipelines/__init__.py
index 5fd33a9503..a1657d1025 100644
--- a/src/deepsparse/v2/__init__.py
+++ b/src/deepsparse/legacy/transformers/pipelines/__init__.py
@@ -1,5 +1,3 @@
-# flake8: noqa
-
 # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,9 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .operators import *
-from .pipeline import *
-from .routers import *
-from .schedulers import *
-from .task import *
-from .utils import *
+# flake8: noqa
+
+from .text_generation import *
diff --git a/src/deepsparse/transformers/pipelines/text_generation.py b/src/deepsparse/legacy/transformers/pipelines/text_generation.py
similarity index 87%
rename from src/deepsparse/transformers/pipelines/text_generation.py
rename to src/deepsparse/legacy/transformers/pipelines/text_generation.py
index 20d08a5f3b..cdd429300c 100644
--- a/src/deepsparse/transformers/pipelines/text_generation.py
+++ b/src/deepsparse/legacy/transformers/pipelines/text_generation.py
@@ -18,29 +18,24 @@
 import os
 import pathlib
 import warnings
-from enum import Enum
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Generator,
-    List,
-    Optional,
-    Sequence,
-    Tuple,
-    Type,
-    Union,
-)
+from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Type, Union
 
 import numpy
 import onnx
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 from transformers import GenerationConfig
 
-from deepsparse import Pipeline
-from deepsparse.pipeline import DEEPSPARSE_ENGINE
+from deepsparse.legacy import Pipeline
+from deepsparse.legacy.pipeline import DEEPSPARSE_ENGINE
 from deepsparse.transformers.engines import NLDecoderEngine
 from deepsparse.transformers.pipelines import TransformersPipeline
+from deepsparse.transformers.schemas.text_generation_schemas import (
+    FinishReason,
+    GeneratedText,
+    GenerationDefaults,
+    TextGenerationInput,
+    TextGenerationOutput,
+)
 from deepsparse.transformers.utils import DecoderKVCache
 from deepsparse.transformers.utils.helpers import (
     check_and_return_generation_config,
@@ -63,152 +58,6 @@
 __all__ = ["TextGenerationPipeline"]
 
 
-# Based off of https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig # noqa E501
-class GenerationDefaults:
-    # Parameters that control the length of the output
-    max_length = None
-    max_new_tokens = 100
-    # Parameters that control the generation strategy used
-    do_sample = False
-    # Parameters for manipulation of the model output logits
-    temperature = 1.0
-    top_k = 50
-    top_p = 1.0
-    repetition_penalty = 1.0
-    # Parameters that define the outputs
-    num_return_sequences = 1
-    output_scores = False
-
-
-class FinishReason(Enum):
-    STOP = "stop"
-    LENGTH = "length"
-    TIME = "time"
-    CALLBACK = "callback"
-    CAPACITY = "capacity"
-    MAX_NEW_TOKENS = "max_new_tokens"
-
-
-class TextGenerationInput(BaseModel):
-    class Config:
-        arbitrary_types_allowed = True
-
-    sequences: Union[str, List[str]] = Field(
-        alias="prompt",
-        description="The input sequences to generate the text from.",
-    )
-    return_input_tokens: bool = Field(
-        default=False,
-        description="A flag that indicates whether to return " "the input_tokens. ",
-    )
-    include_prompt_logits: bool = Field(
-        default=False,
-        description="A flag that indicates whether to return "
-        "the logits for the prompt. If set, prompt_logits are "
-        "`prepended` to the logits for the generated text sequence."
-        "Note: This flag is only applicable when output_scores "
-        "is `True`.",
-    )
-    fixed_sequences_length: bool = Field(
-        default=False,
-        description="A flag that indicates whether to modify "
-        "(pad or truncate) each input text sequence, so that "
-        "its tokenized length is equal to `sequence_length` "
-        "of tokens. Useful, when a batch of predictions needs "
-        "to have consistent length so one "
-        "can compute metric in a batched fashion. ",
-    )
-    streaming: bool = Field(
-        default=False,
-        description="Whether to stream the results back as they are generated. If "
-        "True, then the results are returned as a generator object which yields "
-        "the results as they are generated. If False, then the results are returned "
-        "as a list after it has completed.",
-    )
-    callback: Optional[Callable[[Any], Union[bool, Any]]] = Field(
-        default=None,
-        description="Callable that will be invoked "
-        "on each generated token. If the callable returns "
-        "`False`, the generation will stop. Default is `None`.",
-    )
-    stop: Union[None, str, Sequence[str]] = Field(
-        default=None,
-        description="A string or a list of strings that will be used as"
-        " stop tokens. (token generation will stop when any of the stop"
-        " tokens is generated). Set to `None` to ignore this parameter."
-        " Default is `None`.",
-    )
-
-    presence_penalty: Optional[float] = Field(
-        default=0.0,
-        description="Penalty applied for generating new token. Any existing"
-        " token results in the subtraction of its corresponding logit value."
-        " Default set to 0.0",
-    )
-
-    generation_config: Union[None, str, pathlib.Path, Dict, GenerationConfig] = Field(
-        default=None,
-        description="GenerationConfig file consisting of parameters used to control "
-        "sequences generated for each prompt. The current supported parameters are: "
-        "max_length, max_new_tokens, num_return_sequences, output_scores, top_p, "
-        "top_k, repetition_penalty, do_sample, temperature. If None is provided, "
-        "deepsparse defaults will be used. For all other input types, HuggingFace "
-        "defaults for GenerationConfig will be used. ",
-    )
-
-    generation_kwargs: Optional[Dict] = Field(
-        default=None,
-        description="Any arguments to override generation_config arguments. Refer to "
-        "the generation_config argument for a full list of supported variables.",
-    )
-
-
-class GeneratedText(BaseModel):
-    text: str = Field(
-        description="The generated sequence for a given prompt. If "
-        "streaming is enabled, this will be the next generated token."
-    )
-    score: Optional[Any] = Field(
-        default=None,
-        description="The score for the generated token or sequence. "
-        "The scores have the shape [sequence_length, vocab_size]",
-    )
-    finished: bool = Field(description="Whether generation has stopped.")
-    finished_reason: Optional[str] = Field(
-        default=None,
-        description="The reason for generation to stop. "
-        "Defined by FinishReason. One of stop, length, or time.",
-    )
-
-
-# TODO: Pydantic aliases allow assignment but not reference. Still need to update.
-class TextGenerationOutput(BaseModel):
-    created: datetime.datetime = Field(description="Time of inference creation.")
-    prompts: Union[str, List[str]] = Field(
-        description="Prompts used for the sequence generation. For multiple input "
-        "prompts, a list of prompts is returned"
-    )
-    generations: Union[List[GeneratedText], List[List[GeneratedText]]] = Field(
-        description="For a single prompt, a single list of GeneratedText is returned. "
-        "If multiple prompts are given, a list of GeneratedText is returned for each "
-        "prompt provided. If streamng is enabled, the next generated token is returned."
-        "Otherwise, the full generated sequence is returned."
-    )
-    input_tokens: Optional[
-        Any
-    ] = Field(  # dictionary mapping "token_ids" and "attention_mask" to numpy arrays
-        default=None,
-        description="The output of the tokenizer."
-        "Dictionary containing token_ids and attention_mask, "
-        "both mapping to arrays of size "
-        "[batch_size, sequence_length]",
-    )
-
-    class Config:
-        arbitrary_types_allowed = True
-        extra = "allow"
-
-
 @Pipeline.register(
     task="text_generation",
     task_aliases=["opt", "bloom"],
diff --git a/src/deepsparse/open_pif_paf/pipelines.py b/src/deepsparse/open_pif_paf/pipelines.py
index f3a015ace2..995ca68b33 100644
--- a/src/deepsparse/open_pif_paf/pipelines.py
+++ b/src/deepsparse/open_pif_paf/pipelines.py
@@ -22,12 +22,12 @@
 
 import cv2
 import torch
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.open_pif_paf.schemas import (
     OpenPifPafFields,
     OpenPifPafInput,
     OpenPifPafOutput,
 )
-from deepsparse.pipeline import Pipeline
 from deepsparse.utils import model_to_path
 from deepsparse.yolact.utils import preprocess_array
 from openpifpaf import decoder, network
diff --git a/src/deepsparse/v2/operators/__init__.py b/src/deepsparse/operators/__init__.py
similarity index 100%
rename from src/deepsparse/v2/operators/__init__.py
rename to src/deepsparse/operators/__init__.py
diff --git a/src/deepsparse/v2/operators/engine_operator.py b/src/deepsparse/operators/engine_operator.py
similarity index 95%
rename from src/deepsparse/v2/operators/engine_operator.py
rename to src/deepsparse/operators/engine_operator.py
index 630de2d5bd..f39c55eab9 100644
--- a/src/deepsparse/v2/operators/engine_operator.py
+++ b/src/deepsparse/operators/engine_operator.py
@@ -20,16 +20,24 @@
 from deepsparse import Context as EngineContext
 from deepsparse import Engine, MultiModelEngine, Scheduler
 from deepsparse.benchmark import ORTEngine
+from deepsparse.operators import Operator
 from deepsparse.utils import join_engine_outputs, model_to_path, split_engine_inputs
-from deepsparse.v2.operators import Operator
 
 
 DEEPSPARSE_ENGINE = "deepsparse"
 ORT_ENGINE = "onnxruntime"
+TORCHSCRIPT_ENGINE = "torchscript"
 
 SUPPORTED_PIPELINE_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE]
 
-__all__ = ["EngineOperator", "EngineOperatorInputs", "EngineOperatorOutputs"]
+__all__ = [
+    "DEEPSPARSE_ENGINE",
+    "ORT_ENGINE",
+    "TORCHSCRIPT_ENGINE",
+    "EngineOperator",
+    "EngineOperatorInputs",
+    "EngineOperatorOutputs",
+]
 
 
 class EngineOperatorInputs(BaseModel):
@@ -86,6 +94,7 @@ class EngineOperator(Operator):
     def __init__(
         self,
         model_path: str,
+        batch_size: int = 1,
         engine_type: str = DEEPSPARSE_ENGINE,
         num_cores: int = None,
         num_streams: int = None,
@@ -96,7 +105,7 @@ def __init__(
     ):
         self.model_path = model_to_path(model_path)
         self.engine_context = engine_context
-        self._batch_size = 1
+        self._batch_size = batch_size
 
         if self.engine_context is not None:
             num_cores = num_cores or self.engine_context.num_cores
diff --git a/src/deepsparse/v2/operators/operator.py b/src/deepsparse/operators/operator.py
similarity index 93%
rename from src/deepsparse/v2/operators/operator.py
rename to src/deepsparse/operators/operator.py
index e775056f8f..3fb9336c5c 100644
--- a/src/deepsparse/v2/operators/operator.py
+++ b/src/deepsparse/operators/operator.py
@@ -17,8 +17,8 @@
 
 from pydantic import BaseModel
 
-from deepsparse.v2.operators.registry import OperatorRegistry
-from deepsparse.v2.utils import InferenceState
+from deepsparse.operators.registry import OperatorRegistry
+from deepsparse.utils import InferenceState
 
 
 __all__ = ["Operator"]
@@ -101,8 +101,9 @@ def __call__(
             return self.output_schema(**run_output)
         return run_output
 
-    @staticmethod
+    @classmethod
     def create(
+        cls,
         task: str,
         **kwargs,
     ) -> "Operator":
@@ -112,7 +113,10 @@ def create(
             implementation
         :return: operator object initialized for the given task
         """
-        operator_constructor = OperatorRegistry.get_task_constructor(task)
+        try:
+            operator_constructor = OperatorRegistry.get_task_constructor(task)
+        except Exception:
+            raise
         return operator_constructor(**kwargs)
 
     @abstractmethod
diff --git a/src/deepsparse/v2/operators/registry.py b/src/deepsparse/operators/registry.py
similarity index 89%
rename from src/deepsparse/v2/operators/registry.py
rename to src/deepsparse/operators/registry.py
index 1b83b20728..484cff75a2 100644
--- a/src/deepsparse/v2/operators/registry.py
+++ b/src/deepsparse/operators/registry.py
@@ -14,7 +14,7 @@
 
 from typing import Type
 
-from deepsparse.v2.task import SupportedTasks, dynamic_import_task
+from deepsparse.tasks import SupportedTasks, dynamic_import_task
 from sparsezoo.utils.registry import (
     RegistryMixin,
     get_from_registry,
@@ -34,7 +34,7 @@ class OperatorRegistry(RegistryMixin):
 
     @classmethod
     def register_value(cls, operator, name):
-        from deepsparse.v2.operators import Operator
+        from deepsparse.operators import Operator
 
         if not isinstance(name, list):
             name = [name]
@@ -59,7 +59,7 @@ def get_task_constructor(cls, task: str) -> Type["Operator"]:  # noqa: F821
         :return: The class registered to `task`
         :raises ValueError: if `task` was not registered via `OperatorRegistry.register`
         """
-        from deepsparse.v2.operators import Operator
+        from deepsparse.operators import Operator
 
         if task.startswith("import:"):
             # dynamically import the task from a file
@@ -72,5 +72,8 @@ def get_task_constructor(cls, task: str) -> Type["Operator"]:  # noqa: F821
 
         tasks = registered_names(Operator)
         # step needed to import relevant files required to load the operator
-        SupportedTasks.check_register_task(task, tasks)
+        try:
+            SupportedTasks.check_register_task(task, tasks)
+        except Exception:
+            raise
         return get_from_registry(Operator, task)
diff --git a/src/deepsparse/pipeline.py b/src/deepsparse/pipeline.py
index 90f23654ba..671750e23e 100644
--- a/src/deepsparse/pipeline.py
+++ b/src/deepsparse/pipeline.py
@@ -12,1333 +12,372 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
-Classes and registry for end to end inference pipelines that wrap an underlying
-inference engine and include pre/postprocessing
-"""
-import os
-from abc import ABC, abstractmethod
-from concurrent.futures import ThreadPoolExecutor
-from functools import partial
-from pathlib import Path
-from typing import Any, Dict, Generator, List, Optional, Tuple, Type, Union
-
-import numpy
-from pydantic import BaseModel, Field
-
-from deepsparse import Context, Engine, MultiModelEngine, Scheduler
-from deepsparse.base_pipeline import _REGISTERED_PIPELINES, BasePipeline, SupportedTasks
-from deepsparse.benchmark import ORTEngine, TorchScriptEngine
-from deepsparse.cpu import cpu_details
-from deepsparse.loggers.base_logger import BaseLogger
-from deepsparse.loggers.constants import MetricCategories, SystemGroups
-from deepsparse.utils import (
-    InferenceStages,
-    StagedTimer,
-    TimerManager,
-    join_engine_outputs,
-    split_engine_inputs,
+import asyncio
+import copy
+from typing import Any, Dict, List, Optional, Union
+
+from deepsparse.operators import EngineOperator, Operator
+from deepsparse.routers import Router
+from deepsparse.schedulers import (
+    ContinuousBatchingScheduler,
+    OperatorScheduler,
+    SchedulerGroup,
 )
+from deepsparse.utils import InferenceState, PipelineState
+from deepsparse.utils.helpers import run_func
+from deepsparse.utils.subgraph import SubGraph
 
 
-__all__ = [
-    "DEEPSPARSE_ENGINE",
-    "ORT_ENGINE",
-    "TORCHSCRIPT_ENGINE",
-    "SUPPORTED_PIPELINE_ENGINES",
-    "Pipeline",
-    "BasePipeline",
-    "SupportedTasks",
-    "_REGISTERED_PIPELINES",
-    "PipelineConfig",
-    "question_answering_pipeline",
-    "text_classification_pipeline",
-    "zero_shot_text_classification_pipeline",
-    "token_classification_pipeline",
-    "image_classification_pipeline",
-    "yolo_pipeline",
-    "Bucketable",
-    "BucketingPipeline",
-    "create_engine",
-    "TextGeneration",
-    "CodeGeneration",
-    "Chat",
-]
-
-DEEPSPARSE_ENGINE = "deepsparse"
-ORT_ENGINE = "onnxruntime"
-TORCHSCRIPT_ENGINE = "torchscript"
-
-SUPPORTED_PIPELINE_ENGINES = [DEEPSPARSE_ENGINE, ORT_ENGINE]
+__all__ = ["Pipeline"]
 
 
-class Pipeline(BasePipeline):
+class Pipeline(Operator):
     """
-    Generic Pipeline abstract class meant to wrap inference engine objects to include
-    data pre/post-processing. Inputs and outputs of pipelines should be serialized
-    as pydantic Models. See the BasePipeline above for additional parameters provided
-    during inference.
-
-    Pipelines should not be instantiated by their constructors, but rather the
-    `Pipeline.create()` method. The task name given to `create` will be used to
-    load the appropriate pipeline. When creating a Pipeline, the pipeline should
-    inherit from `Pipeline` and implement the `setup_onnx_file_path`, `process_inputs`,
-    `process_engine_outputs`, `input_schema`, and `output_schema` abstract methods.
-
-    Finally, the class definition should be decorated by the `Pipeline.register`
-    function. This defines the task name and task aliases for the pipeline and
-    ensures that it will be accessible by `Pipeline.create`. The implemented
-    `Pipeline` subclass must be imported at runtime to be accessible.
-
-    Pipeline lifecycle:
-     - On instantiation
-         * `onnx_file_path` <- `setup_onnx_file_path`
-         * `engine` <- `_initialize_engine`
-
-     - on __call__:
-         * `parsed_inputs: input_schema` <- `parse_inputs(*args, **kwargs)`
-         * `pre_processed_inputs` <- `process_inputs(parsed_inputs)`
-         * `engine_outputs` <- `engine(pre_processed_inputs)`
-         * `outputs: output_schema` <- `process_engine_outputs(engine_outputs)`
+    Pipeline accepts a series of operators, schedulers, and a router. Calling a pipeline
+    will use the router to run through all the defined operators. The operators should
+    be implemented using the Operator class and each implemented operator should be
+    responsible for a functional component of the pipelines. The flow of inputs/outputs
+    between the operators and the steps in the pipeline should be defined by the router,
+    (based off of the Router class), which dicates the next operator in the pipeline.
+    Execution of the operators will be handled by the provided schedulers.
 
-    Example use of register:
-     ```python
-     @Pipeline.register(
-     task="example_task",
-     task_aliases=["example_alias_1", "example_alias_2"],
-     )
-     class PipelineImplementation(Pipeline):
-     # implementation of Pipeline abstract methods here
-     ```
+    :param ops: Operators to run within the pipeline. Can either be a list of operators
+        or dictionary of operators.
+    :param router: A Router which dictates the next operator to call.
+    :param schedulers: A list of schedulers to run operators.
+    :param pipeline_state: pipeline_state created during pipeline initialization
 
-    Example use of pipeline:
-     ```python
-     example_pipeline = Pipeline.create(
-         task="example_task",
-         model_path="model.onnx",
-     )
-     pipeline_outputs = example_pipeline(pipeline_inputs)
-     ```
-
-    :param model_path: path on local system or SparseZoo stub to load the model from
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. None represents
-        dynamic batch mode (Pipeline will accept any batch size). Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param num_streams: The max number of requests the model can handle
-        concurrently. None or 0 implies a scheduler-defined default value;
-        default None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param context: Optional Context object to use for creating instances of
-        MultiModelEngine. The Context contains a shared scheduler along with
-        other runtime information that will be used across instances of the
-        MultiModelEngine to provide optimal performance when running multiple
-        models concurrently
-    :param executor: An optional ThreadPoolExecutor() object, if provided the
-        pipeline executes inference requests in a non-blocking manner and returns
-        a Future object, call Future.result() on returned object to get the result.
-        Can also accept an int number of workers, a ThreadPoolExecutor object is
-        auto-initialized with the specified integer in that case; None represents
-        synchronous execution - if running in dynamic batch mode a default
-        ThreadPoolExecutor with default workers equal to the number of available
-        cores / 2
     """
 
     def __init__(
         self,
-        model_path: str,
-        engine_type: str = DEEPSPARSE_ENGINE,
-        batch_size: Optional[int] = 1,
-        num_cores: int = None,
-        num_streams: int = None,
-        scheduler: Scheduler = None,
-        input_shapes: List[List[int]] = None,
-        context: Optional[Context] = None,
-        executor: Optional[Union[ThreadPoolExecutor, int]] = None,
-        benchmark: bool = False,
-        _delay_engine_initialize: bool = False,  # internal use only
-        **kwargs,
+        ops: Union[Dict[str, Operator], List[Operator]],
+        router: Router,
+        schedulers: List[OperatorScheduler],
+        continuous_batching_scheduler: Optional[ContinuousBatchingScheduler] = None,
+        pipeline_state: Optional[PipelineState] = None,
     ):
-        self._benchmark = benchmark
-        self._model_path_orig = model_path
-        self._model_path = model_path
-        self._engine_type = engine_type
-        self._batch_size = batch_size
-        self._timer_manager = TimerManager(enabled=True, multi=benchmark)
-        self.context = context
-        super().__init__(**kwargs)
-
-        self.executor, self._num_async_workers = _initialize_executor_and_workers(
-            batch_size=batch_size,
-            workers_or_executor=executor,
-        )
 
-        if self.context is not None:
-            num_cores = num_cores or self.context.num_cores
-            if self.context.num_cores != num_cores:
-                raise ValueError(
-                    f"num_cores mismatch. Expected {self.context.num_cores} "
-                    f"from passed context, but got {num_cores} while "
-                    f"instantiating Pipeline"
-                )
-
-        self._engine_args = dict(
-            batch_size=self._batch_size or 1,  # bs=1 for dynamic batch
-            num_cores=num_cores,
-            input_shapes=input_shapes,
-        )
-        if engine_type.lower() == DEEPSPARSE_ENGINE:
-            self._engine_args["scheduler"] = scheduler
-            self._engine_args["num_streams"] = num_streams
+        self.ops = ops
+        self.router = router
+        self.schedulers = schedulers
+        self.pipeline_state = pipeline_state
+        self._continuous_batching_scheduler = continuous_batching_scheduler
+        self.validate()
 
-        self.onnx_file_path = self.setup_onnx_file_path()
+        self._scheduler_group = SchedulerGroup(self.schedulers)
 
-        if _delay_engine_initialize:
-            self.engine = None
+    def _run_next(
+        self, inp: Any, inference_state: InferenceState, next_step: str, **kwargs
+    ):
+        if (
+            isinstance(self.ops[next_step], EngineOperator)
+            and self._continuous_batching_scheduler
+        ):
+            func = self._continuous_batching_scheduler.submit
+            inp = self.ops[next_step].input_schema(**inp)
         else:
-            self.engine = self._initialize_engine()
-        self._batch_size = self._batch_size or 1
-
-        self.log(
-            identifier=f"{SystemGroups.INFERENCE_DETAILS}/num_cores_total",
-            value=num_cores,
-            category=MetricCategories.SYSTEM,
+            func = self._scheduler_group.submit
+
+        return run_func(
+            func=func,
+            operator=self.ops[next_step],
+            inp=inp,
+            pipeline_state=self.pipeline_state,
+            inference_state=inference_state,
+            **kwargs,
         )
 
-    def __call__(self, *args, **kwargs) -> BaseModel:
-        with self.timer_manager.new_timer_context() as timer:
-            if "engine_inputs" in kwargs:
-                raise ValueError(
-                    "invalid kwarg engine_inputs. engine inputs determined "
-                    f"by {self.__class__.__qualname__}.parse_inputs"
-                )
-
-            # ------ PREPROCESSING ------
-            timer.start(InferenceStages.PRE_PROCESS)
-            # parse inputs into input_schema
-            pipeline_inputs = self.parse_inputs(*args, **kwargs)
-            self.log(
-                identifier="pipeline_inputs",
-                value=pipeline_inputs,
-                category=MetricCategories.DATA,
+    async def _run_sub_graphs(
+        self,
+        sub_graph_inputs: List[Any],
+        sub_graphs: List[SubGraph],
+        loop: Optional[asyncio.AbstractEventLoop] = None,
+    ) -> List[Any]:
+        """
+        Run a list of sub_graphs asynchronously. Polls to identify the sub graph that is
+        still running but has completed its current step. Schedules the next step
+        subgraph step. This is repeated until all subgraphs have finished running and
+        have reached their end step (stored in the Subgraph.end attribute).
+
+        :param sub_graph_inputs: A list of inputs that should be passed to each
+        subgraph. Each subgraph is given an element of the list as input to its
+        first node.
+        :param sub_graphs: A list of Subgraph objects. Each stores the relevant
+        execution information for the particular subgraph, such as its current step
+        in the sub graph, inference state, output, and end step.
+
+        :returns: a list of outputs for all the completed Subgraph objects. Returned
+        in the same order that the subgraphs were passed to the function.
+        """
+        for i in range(len(sub_graphs)):
+            sub_graphs[i].output = self._run_next(
+                sub_graph_inputs[i], sub_graphs[i].inf, sub_graphs[i].step, loop=loop
             )
 
-            if not isinstance(pipeline_inputs, self.input_schema):
-                raise RuntimeError(
-                    f"Unable to parse {self.__class__} inputs into a "
-                    f"{self.input_schema} object. "
-                    f"Inputs parsed to {type(pipeline_inputs)}"
+        # Execute all sub graphs until all graphs have been completed.
+        while any(not x.completed for x in sub_graphs):
+            for sub_graph in sub_graphs:
+                if not sub_graph.completed:
+                    # get the result for the completed operator; resolve its output
+                    if isinstance(sub_graph.output, asyncio.Future):
+                        await sub_graph.output
+                    operator_output = sub_graph.output.result()
+                    operator_output = sub_graph.parse_output(operator_output)
+
+                    # determine the next step for the particular operator, using
+                    # its previous output and previously stored step
+                    next_step = self.router.next(
+                        sub_graph.step, self.ops, operator_output
+                    )
+                    # update the step
+                    sub_graph.step = next_step
+
+                    # store the output for the next step. If the next step is
+                    # end step, this particular route has completed. Simply
+                    # update the output value
+                    if next_step in sub_graph.end:
+                        sub_graph.output = operator_output
+                        sub_graph.completed = True
+                    else:
+                        sub_graph.output = self._run_next(
+                            inp=operator_output,
+                            inference_state=sub_graph.inf,
+                            next_step=next_step,
+                            loop=loop,
+                        )
+
+        return [x.output for x in sub_graphs]
+
+    async def run_async(self, *args, inference_state: InferenceState, **kwargs):
+        """
+        Run through the operators using the provided router and scheduler.
+        The input to a given operator is the output of the previous operator.
+
+        :param inference_state: inference_state for the pipeline.
+        :param pipeline_state: pipeline_state for the pipeline. The values in the state
+            are created during pipeline creation and are read-only during inference.
+        """
+        loop = asyncio.get_running_loop()
+
+        next_step = self.router.START_ROUTE
+        operator_output = None
+
+        while next_step != self.router.END_ROUTE:
+            # Either a dictionary key or valid index
+
+            if next_step == self.router.SPLIT_ROUTE:
+                if operator_output is None:
+                    raise ValueError(
+                        f"{self.router.SPLIT_ROUTE} should appear after "
+                        f"{self.ROUTER.START_ROUTE}"
+                    )
+
+                operator_output = await self._apply_split(
+                    operator_output, inference_state, loop=loop
                 )
-            # batch size of the inputs may be `> self._batch_size` at this point
-            engine_inputs = self.process_inputs(pipeline_inputs)
-            if isinstance(engine_inputs, tuple):
-                engine_inputs, context = engine_inputs
-            else:
-                context = {}
-
-            timer.stop(InferenceStages.PRE_PROCESS)
-            self.log(
-                identifier="engine_inputs",
-                value=engine_inputs,
-                category=MetricCategories.DATA,
-            )
-
-            # ------ INFERENCE ------
-            # split inputs into batches of size `self._batch_size`
-            timer.start(InferenceStages.ENGINE_FORWARD)
-            batches, orig_batch_size = self.split_engine_inputs(
-                engine_inputs, self._batch_size
-            )
-
-            # submit split batches to engine threadpool
-            engine_forward_with_context = partial(self.engine_forward, context=context)
-            batch_outputs = list(
-                self.executor.map(engine_forward_with_context, batches)
-            )
-
-            # join together the batches of size `self._batch_size`
-            engine_outputs = self.join_engine_outputs(
-                batch_outputs, orig_batch_size, **context
-            )
-            timer.stop(InferenceStages.ENGINE_FORWARD)
-
-            self.log(
-                identifier=f"{SystemGroups.INFERENCE_DETAILS}/input_batch_size_total",
-                # to get the batch size of the inputs, we need to look
-                # to multiply the engine batch size (self._batch_size)
-                # by the number of batches processed by the engine during
-                # a single inference call
-                value=len(batch_outputs) * self._batch_size,
-                category=MetricCategories.SYSTEM,
-            )
-            self.log(
-                identifier="engine_outputs",
-                value=engine_outputs,
-                category=MetricCategories.DATA,
-            )
-
-            # ------ POSTPROCESSING ------
-            timer.start(InferenceStages.POST_PROCESS)
-            pipeline_outputs = self.process_engine_outputs(engine_outputs, **context)
-            if not isinstance(pipeline_outputs, (self.output_schema, Generator)):
-                raise ValueError(
-                    f"Outputs of {self.__class__} must be instances of "
-                    f"{self.output_schema} found output of type "
-                    f"{type(pipeline_outputs)}"
+                next_step = self.router.route[self.router.JOIN_ROUTE]
+                if next_step == self.router.END_ROUTE:
+                    return operator_output
+
+            if next_step == self.router.START_ROUTE:
+                outputs = run_func(
+                    *args,
+                    func=self._scheduler_group.submit,
+                    operator=self.ops[next_step],
+                    inference_state=inference_state,
+                    pipeline_state=self.pipeline_state,
+                    loop=loop,
+                    **kwargs,
                 )
-            timer.stop(InferenceStages.POST_PROCESS)
-            self.log(
-                identifier="pipeline_outputs",
-                value=pipeline_outputs,
-                category=MetricCategories.DATA,
-            )
-
-        self.log_inference_times(timer)
-
-        return pipeline_outputs
-
-    @classmethod
-    def from_config(
-        cls,
-        config: Union["PipelineConfig", str, Path],
-        context: Optional[Context] = None,
-        logger: Optional[BaseLogger] = None,
-    ) -> "Pipeline":
-        """
-        :param config: PipelineConfig object, filepath to a json serialized
-            PipelineConfig, or raw string of a json serialized PipelineConfig
-        :param context: Optional Context object to use for creating instances of
-            MultiModelEngine. The Context contains a shared scheduler along with
-            other runtime information that will be used across instances of the
-            MultiModelEngine to provide optimal performance when running
-            multiple models concurrently
-        :param logger: An optional DeepSparse Logger object for inference
-            logging. Default is None
-        :return: loaded Pipeline object from the config
-        """
-        if isinstance(config, Path) or (
-            isinstance(config, str) and os.path.exists(config)
-        ):
-            if isinstance(config, str):
-                config = Path(config)
-            config = PipelineConfig.parse_file(config)
-        if isinstance(config, str):
-            config = PipelineConfig.parse_raw(config)
-
-        return cls.create(
-            task=config.task,
-            model_path=config.model_path,
-            engine_type=config.engine_type,
-            batch_size=config.batch_size,
-            num_cores=config.num_cores,
-            scheduler=config.scheduler,
-            input_shapes=config.input_shapes,
-            alias=config.alias,
-            context=context,
-            logger=logger,
-            **config.kwargs,
-        )
+                await outputs
+                operator_output = outputs.result()
 
-    @abstractmethod
-    def setup_onnx_file_path(self) -> str:
-        """
-        Performs any setup to unwrap and process the given `model_path` and other
-        class properties into an inference ready onnx file to be compiled by the
-        engine of the pipeline
+            else:
+                outputs = self._run_next(
+                    inp=operator_output,
+                    next_step=next_step,
+                    inference_state=inference_state,
+                    loop=loop,
+                )
+                await outputs
+                operator_output = outputs.result()
 
-        :return: file path to the ONNX file for the engine to compile
-        """
-        raise NotImplementedError()
+            if isinstance(operator_output, tuple):
+                state_update = operator_output[-1]
+                operator_output = operator_output[0]
 
-    @abstractmethod
-    def process_inputs(
-        self,
-        inputs: BaseModel,
-    ) -> Union[List[numpy.ndarray], Tuple[List[numpy.ndarray], Dict[str, Any]]]:
-        """
-        :param inputs: inputs to the pipeline. Must be the type of the `input_schema`
-            of this pipeline
-        :return: inputs of this model processed into a list of numpy arrays that
-            can be directly passed into the forward pass of the pipeline engine. Can
-            also include a tuple with engine inputs and special key word arguments
-            to pass to process_engine_outputs to facilitate information from the raw
-            inputs to postprocessing that may not be included in the engine inputs
-        """
-        raise NotImplementedError()
+            next_step = self.router.next(next_step, self.ops, operator_output)
+            if state_update:
+                inference_state.update_state(state_update)
+        return operator_output
 
-    @abstractmethod
-    def process_engine_outputs(
+    async def _apply_split(
         self,
-        engine_outputs: List[numpy.ndarray],
-        **kwargs,
-    ) -> BaseModel:
-        """
-        :param engine_outputs: list of numpy arrays that are the output of the engine
-            forward pass
-        :return: outputs of engine post-processed into an object in the `output_schema`
-            format of this pipeline
-        """
-        raise NotImplementedError()
-
-    @property
-    def model_path_orig(self) -> str:
-        """
-        :return: value originally passed to the `model_path` argument to initialize
-            this Pipeline
-        """
-        return self._model_path_orig
-
-    @property
-    def model_path(self) -> str:
-        """
-        :return: path on local system to the onnx file of this model or directory
-            containing a model.onnx file along with supporting files
-        """
-        return self._model_path
-
-    @property
-    def engine_args(self) -> Dict[str, Any]:
-        """
-        :return: arguments besides onnx filepath used to instantiate engine
-        """
-        return self._engine_args
-
-    @property
-    def engine_type(self) -> str:
-        """
-        :return: type of inference engine used for model forward pass
-        """
-        return self._engine_type
-
-    @property
-    def timer_manager(self) -> TimerManager:
-        return self._timer_manager
-
-    @property
-    def current_timer(self) -> Optional[StagedTimer]:
-        """
-        :return: current timer for the pipeline, if any
-        """
-        timer = self.timer_manager.current
-
-        if timer is None:
-            timer = self.timer_manager.latest
-
-        return timer
-
-    @property
-    def benchmark(self) -> bool:
-        return self._benchmark
-
-    @benchmark.setter
-    def benchmark(self, value: bool):
-        self._benchmark = value
-        self.timer_manager.multi = value
-
-    def to_config(self) -> "PipelineConfig":
-        """
-        :return: PipelineConfig that can be used to reload this object
-        """
-
-        if not hasattr(self, "task"):
-            raise RuntimeError(
-                f"{self.__class__} instance has no attribute task. Pipeline objects "
-                "must have a task to be serialized to a config. Pipeline objects "
-                "must be declared with the Pipeline.register object to be assigned a "
-                "task"
+        inp: Any,
+        inference_state: InferenceState,
+        loop: Optional[asyncio.AbstractEventLoop] = None,
+    ):
+        batches, orig_batch_size = self.expand_inputs(inp, 1)
+
+        # Create a list of SplitRoutes, per batch size 1
+        # Each SplitRoute object holds information about the particular path it
+        # follows. All start at the same step defined by SPLIT_ROUTE and start
+        # with the same inference_state.
+        split_graphs = [
+            SubGraph(
+                inf=copy.deepcopy(inference_state),
+                step=self.router.route[self.router.SPLIT_ROUTE],
+                end=[self.router.JOIN_ROUTE],
             )
+            for i in range(len(batches))
+        ]
 
-        # parse any additional properties as kwargs
-        kwargs = {}
-        for attr_name, attr in self.__class__.__dict__.items():
-            if isinstance(attr, property) and attr_name not in dir(PipelineConfig):
-                kwargs[attr_name] = getattr(self, attr_name)
-
-        return PipelineConfig(
-            task=self.task,
-            model_path=self.model_path_orig,
-            engine_type=self.engine_type,
-            batch_size=self._batch_size,
-            num_cores=self._engine_args.get("num_cores"),
-            scheduler=self._engine_args.get("scheduler"),
-            input_shapes=self._engine_args.get("input_shapes"),
-            alias=self.alias,
-            kwargs=kwargs,
+        outputs = await self._run_sub_graphs(
+            sub_graph_inputs=batches, sub_graphs=split_graphs, loop=loop
         )
+        return self.condense_inputs(outputs)
 
-    def join_engine_outputs(
-        self, batch_outputs: List[List[numpy.ndarray]], orig_batch_size: int, **kwargs
-    ) -> List[numpy.ndarray]:
-        """
-        Joins list of engine outputs together into one list.
-        This is the opposite of `split_engine_inputs` and is meant to be used in tandem.
-
-        :param batch_outputs: list of engine outputs
-        :param orig_batch_size: original batch size of the inputs
-        :return: list of engine outputs joined together
-        """
-        return join_engine_outputs(batch_outputs, orig_batch_size)
-
-    def split_engine_inputs(
-        self, items: List[numpy.ndarray], batch_size: int
-    ) -> List[List[numpy.ndarray]]:
-        """
-        Splits each item into numpy arrays with the first dimension == `batch_size`.
-        This is the opposite of `join_engine_outputs` and is meant to be used in tandem.
-
-        :param items: size of each batch to split into
-        :param batch_size: size of each batch to enforce
-
-        :return: list of batches, where each batch is a list of numpy arrays
-        """
-        return split_engine_inputs(items, batch_size)
-
-    def engine_forward(
-        self,
-        engine_inputs: List[numpy.ndarray],
-        context: Dict = {},
-    ) -> List[numpy.ndarray]:
+    @classmethod
+    def create(cls, task: str, **kwargs) -> "Pipeline":
         """
-        :param engine_inputs: list of numpy inputs to Pipeline engine forward
-            pass
-        :param context: optional dictionary to be used during engine execution
-        :return: result of forward pass to Pipeline engine
+        :param task: Pipeline task
+        :param kwargs: extra task specific kwargs to be passed to the Pipeline
+        :return: pipeline object initialized for the given task
         """
-        return self.engine(engine_inputs)
+        try:
+            pipeline = Operator.create(task=task, **kwargs)
+            if not isinstance(pipeline, cls):
+                raise RuntimeError(
+                    "Pipeline was not created for the given task. The "
+                    "provided task should be registered using the OperatorRegistry"
+                )
+        except Exception:
+            from deepsparse.legacy import Pipeline
 
-    def log_inference_times(self, timer: StagedTimer):
-        """
-        logs stage times in the given timer
+            pipeline = Pipeline.create(task=task, **kwargs)
+        return pipeline
 
-        :param timer: timer to log
-        """
-        for stage, time in timer.times.items():
-            self.log(
-                identifier=f"{SystemGroups.PREDICTION_LATENCY}/{stage}_seconds",
-                value=time,
-                category=MetricCategories.SYSTEM,
-            )
-
-    def _initialize_engine(
+    def run(
         self,
-    ) -> Union[Engine, MultiModelEngine, ORTEngine, TorchScriptEngine]:
-        return create_engine(
-            self.onnx_file_path, self.engine_type, self._engine_args, self.context
-        )
-
-    def _properties_dict(self) -> Dict:
-        return {
-            "config": self.to_config(),
-            "engine": self.engine,
-        }
-
-    def __repr__(self):
-        """
-        :return: Unambiguous representation of the current pipeline
+        *args,
+        inference_state: InferenceState,
+        **kwargs,
+    ):
         """
-        return "{}({})".format(self.__class__, self._properties_dict())
+        Run through the operators using the provided router and scheduler.
+        The input to a given operator is the output of the previous operator.
 
-    def __str__(self):
+        :param inference_state: inference_state for the pipeline.
+        :param pipeline_state: pipeline_state for the pipeline. The values in the state
+            are created during pipeline creation and are read-only during inference.
         """
-        :return: Human readable form of the current pipeline
-        """
-        formatted_props = [
-            "\t{}: {}".format(key, val) for key, val in self._properties_dict().items()
-        ]
-
-        return "{}.{}:\n{}".format(
-            self.__class__.__module__,
-            self.__class__.__qualname__,
-            "\n".join(formatted_props),
-        )
+        next_step = self.router.START_ROUTE
+        operator_output = None
+        while next_step != self.router.END_ROUTE:
 
+            # Split Grap Execution (i.e multiple subgraphs)
+            # NOTE: split_route should only appear after the start route node
+            if next_step == self.router.SPLIT_ROUTE:
+                if operator_output is None:
+                    raise ValueError(
+                        f"{self.router.SPLIT_ROUTE} should appear after "
+                        f"{self.router.START_ROUTE}"
+                    )
 
-class PipelineConfig(BaseModel):
-    """
-    Configuration for creating a Pipeline object
-
-    Can be used to create a Pipeline from a config object or file with
-    Pipeline.from_config(), or used as a building block for other configs
-    such as for deepsparse.server
-    """
-
-    task: str = Field(
-        description="name of task to create a pipeline for",
-    )
-    model_path: str = Field(
-        default=None,
-        description="path on local system or SparseZoo stub to load the model from",
-    )
-    engine_type: str = Field(
-        default=DEEPSPARSE_ENGINE,
-        description=(
-            "inference engine to use. Currently supported values include "
-            "'deepsparse' and 'onnxruntime'. Default is 'deepsparse'"
-        ),
-    )
-    batch_size: Optional[int] = Field(
-        default=1,
-        description=("static batch size to use for inference. Default is 1"),
-    )
-    num_cores: int = Field(
-        default=None,
-        description=(
-            "number of CPU cores to allocate for inference engine. None"
-            "specifies all available cores. Default is None"
-        ),
-    )
-    scheduler: Optional[str] = Field(
-        default="async",
-        description=(
-            "(deepsparse only) kind of scheduler to execute with. Defaults to async"
-        ),
-    )
-    input_shapes: List[List[int]] = Field(
-        default=None,
-        description=(
-            "list of shapes to set ONNX the inputs to. Pass None to use model as-is. "
-            "Default is None"
-        ),
-    )
-    alias: str = Field(
-        default=None,
-        description=(
-            "optional name to give this pipeline instance, useful when inferencing "
-            "with multiple models. Default is None"
-        ),
-    )
-    kwargs: Dict[str, Any] = Field(
-        default={},
-        description=(
-            "Additional arguments for inference with the model that will be passed "
-            "into the pipeline as kwargs"
-        ),
-    )
+                operator_output = asyncio.run(
+                    self._apply_split(operator_output, inference_state)
+                )
+                next_step = self.router.route[self.router.JOIN_ROUTE]
+                if next_step == self.router.END_ROUTE:
+                    return operator_output
+
+            if next_step == self.router.START_ROUTE:
+                operator_output = run_func(
+                    *args,
+                    func=self._scheduler_group.submit,
+                    operator=self.ops[next_step],
+                    inference_state=inference_state,
+                    pipeline_state=self.pipeline_state,
+                    **kwargs,
+                ).result()
+
+                if isinstance(operator_output, tuple):
+                    operator_output, state_update = (
+                        operator_output[0],
+                        operator_output[-1],
+                    )
+                    inference_state.update_state(state_update)
+
+                next_step = self.router.next(next_step, self.ops, operator_output)
 
+            else:
+                # Single graph execution
+                graph = SubGraph(
+                    inf=copy.deepcopy(inference_state),
+                    step=next_step,
+                    end=[self.router.SPLIT_ROUTE, self.router.END_ROUTE],
+                )
 
-class BucketingPipeline(object):
-    """
-    A Proxy class that adds Bucketing functionality to Pipelines
+                operator_output = asyncio.run(
+                    self._run_sub_graphs(
+                        sub_graph_inputs=[operator_output], sub_graphs=[graph]
+                    )
+                )[0]
 
-    :param pipelines: A list of Pipeline objects/buckets that implement
-        `Bucketable` contract
-    """
+                inference_state = graph.inf
+                next_step = graph.step
 
-    def __init__(self, pipelines: List[Pipeline]):
-        if not (pipelines and isinstance(pipelines, list)):
-            raise ValueError(
-                "Expected a non empty List of pipeline objects but got " f"{pipelines}"
-            )
-        self._pipelines = pipelines
-        self._pipeline_class = pipelines[0].__class__
-        self._validate_pipeline_class()
+        return operator_output
 
     def __call__(self, *args, **kwargs):
-        bucket, parsed_inputs = self._choose_bucket(*args, **kwargs)
-        return bucket(parsed_inputs)
-
-    def _choose_bucket(self, *args, **kwargs):
-        parsed_inputs = self._pipelines[-1].parse_inputs(*args, **kwargs)
-        bucket = self._pipeline_class.route_input_to_bucket(
-            input_schema=parsed_inputs,
-            pipelines=self._pipelines,
-        )
-        return bucket, parsed_inputs
-
-    def __getattr__(self, item):
-        value = getattr(self._pipelines[0].__class__, item)
-
-        if isinstance(value, property):
-            return getattr(self._pipelines[0], item)
-
-        raise AttributeError(
-            f"{item} not found in {self.__class__.__name__}, "
-            f"and is not a property of {self._pipeline_class.__name__}"
-        )
-
-    @property
-    def input_schema(self) -> Type[BaseModel]:
         """
-        :return: pydantic model class that inputs to this pipeline must comply to
-        """
-        return self._pipelines[0].input_schema
+        Consolidate any provided inference_state or pipeline_state objects and pass
+        any other operator inputs to run().
 
-    @property
-    def output_schema(self) -> Type[BaseModel]:
-        """
-        :return: pydantic model class that outputs of this pipeline must comply to
+        :return: output of the pipeline operators ran with the router for the given
+            input
         """
-        return self._pipelines[0].output_schema
-
-    def _validate_pipeline_class(self):
-        # validate all pipelines belong to the same class
-
-        if not issubclass(self._pipeline_class, Bucketable):
-            raise ValueError(f"{self._pipeline_class} is not Bucketable")
-
-        is_valid = all(
-            isinstance(pipeline, self._pipeline_class) for pipeline in self._pipelines
-        )
-
-        if not is_valid:
-            raise ValueError(
-                "All Pipeline Buckets must belong to the same Pipeline Class"
-            )
+        if kwargs.get("inference_state"):
+            inference_state = kwargs.pop("inference_state")
+        else:
+            inference_state = InferenceState()
+            inference_state.create_state({})
 
+        kwargs["inference_state"] = inference_state
 
-class Bucketable(ABC):
-    """
-    A contract, that ensures implementing Pipeline class can create multiple Pipeline
-    instances and route each input sample to correct instance based off of specific
-    implementations of abstract methods defined in this contract
-    """
+        return self.run(*args, **kwargs)
 
-    @staticmethod
-    @abstractmethod
-    def should_bucket(*args, **kwargs) -> bool:
+    def expand_inputs(self, *args, **kwargs):
         """
-        :returns: True if buckets should be created else False
+        Generic function to handle expanding values.
         """
-        pass
+        raise NotImplementedError(
+            "This function should be implemented for any router with split or join"
+            "nodes. expand_inputs will be called prior to the split node (stored in "
+            "the router's SPLIT_ROUTE attribute), expanding outputs for each output "
+            "such that there is a batch size of one per thread."
+        )
 
-    @staticmethod
-    @abstractmethod
-    def create_pipeline_buckets(*args, **kwargs) -> List[Pipeline]:
+    def condense_inputs(self, *args, **kwargs):
         """
-        :return: Create and return a list of Pipeline objects
-            representing different buckets
+        Generic function to handle condensing values.
         """
-        pass
+        raise NotImplementedError(
+            "This function should be implemented for any router with split or join "
+            "nodes. condense_inputs will be called after the join node (stored in the "
+            "router's JOIN_ROUTE attribute), condensing outputs from multiple threads."
+        )
 
-    @staticmethod
-    @abstractmethod
-    def route_input_to_bucket(
-        *args, input_schema: BaseModel, pipelines: List[Pipeline], **kwargs
-    ) -> Pipeline:
+    def validate(self):
         """
-        :param input_schema: The schema representing an input to the pipeline
-        :param pipelines: Different buckets to be used
-        :return: The correct Pipeline object (or Bucket) to route input to
+        Validate that compatability of the router and operators provided.
         """
-        pass
-
-
-def create_engine(
-    onnx_file_path: str,
-    engine_type: str,
-    engine_args: Dict,
-    context: Optional[Context] = None,
-) -> Union[Engine, MultiModelEngine, ORTEngine]:
-    """
-    Create an inference engine for a given ONNX model
-
-    :param onnx_file_path: path to ONNX model file
-    :param engine_type: type of engine to create.
-    :param engine_args: arguments to pass to engine constructor
-    :param context: context to use for engine
-    :return: inference engine
-    """
-    engine_type = engine_type.lower()
-
-    if engine_type == DEEPSPARSE_ENGINE:
-        if context is not None and isinstance(context, Context):
-            engine_args.pop("num_cores", None)
-            engine_args.pop("scheduler", None)
-            engine_args.pop("num_streams", None)
-            engine_args["context"] = context
-            return MultiModelEngine(
-                model=onnx_file_path,
-                **engine_args,
-            )
-        engine_args.pop("cache_output_bools", None)
-        return Engine(onnx_file_path, **engine_args)
-
-    if engine_type == ORT_ENGINE:
-        return ORTEngine(onnx_file_path, **engine_args)
-
-    if engine_type == TORCHSCRIPT_ENGINE:
-        return TorchScriptEngine(onnx_file_path, **engine_args)
-
-    raise ValueError(
-        f"Unknown engine_type {engine_type}. Supported values include: "
-        f"{SUPPORTED_PIPELINE_ENGINES}"
-    )
-
-
-def _initialize_executor_and_workers(
-    batch_size: Optional[int],
-    workers_or_executor: Optional[Union[int, ThreadPoolExecutor]],
-) -> Tuple[Optional[ThreadPoolExecutor], int]:
-    if isinstance(workers_or_executor, ThreadPoolExecutor):
-        num_async_workers = workers_or_executor._max_workers  # noqa
-        executor = workers_or_executor
-    elif isinstance(workers_or_executor, int):
-        num_async_workers = max(1, workers_or_executor)
-        executor = ThreadPoolExecutor(max_workers=num_async_workers)
-    elif batch_size is None and workers_or_executor is None:
-        # default num workers to num available cores / 2
-        num_cpu_cores_avaailable = cpu_details()[0]
-        num_async_workers = max(1, num_cpu_cores_avaailable // 2)
-        executor = ThreadPoolExecutor(max_workers=num_async_workers)
-    elif workers_or_executor is not None:
-        raise ValueError(
-            "Expected an int or ThreadPoolExecutor to run in async mode"
-            f" but got {workers_or_executor} of type {type(workers_or_executor)}"
-        )
-    else:
-        executor = ThreadPoolExecutor(max_workers=1)
-        num_async_workers = 1
-
-    if batch_size is None and executor is None:
-        raise ValueError(
-            "Must have an ThreadPoolExecutor for running in dynamic batch mode "
-            f"but got {None}"
-        )
-
-    return executor, num_async_workers
-
-
-def text_generation_pipeline(
-    *args, model: Optional[str] = None, **kwargs
-) -> "Pipeline":
-    """
-    :return: text generation pipeline with the given args and
-        kwargs passed to Pipeline.create
-    """
-    kwargs = _parse_model_arg(model, **kwargs)
-    return Pipeline.create("text_generation", *args, **kwargs)
-
-
-def code_generation_pipeline(
-    *args, model: Optional[str] = None, **kwargs
-) -> "Pipeline":
-    """
-    :return: text generation pipeline with the given args and
-        kwargs passed to Pipeline.create
-    """
-    kwargs = _parse_model_arg(model, **kwargs)
-    return Pipeline.create("code_generation", *args, **kwargs)
-
-
-def chat_pipeline(*args, model: Optional[str] = None, **kwargs) -> "Pipeline":
-    """
-    :return: text generation pipeline with the given args and
-        kwargs passed to Pipeline.create
-    """
-    kwargs = _parse_model_arg(model, **kwargs)
-    return Pipeline.create("chat", *args, **kwargs)
-
-
-def _parse_model_arg(model: Optional[str], **kwargs) -> dict:
-    if model is not None:
-        model_path = kwargs.get("model_path")
-        if model_path is not None:
-            raise ValueError(
-                f"Only one of model and model_path may be supplied, found {model} "
-                f"and {model_path} respectively"
-            )
-        kwargs["model_path"] = model
-    return kwargs
-
-
-# aliases for top level import
-TextGeneration = text_generation_pipeline
-CodeGeneration = code_generation_pipeline
-Chat = chat_pipeline
-
-
-def question_answering_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    transformers question_answering pipeline
-
-    example instantiation:
-    ```python
-    question_answering = Pipeline.create(
-        task="question_answering",
-        model_path="question_answering_model_dir/",
-    )
-    ```
-
-    :param model_path: sparsezoo stub to a transformers model or (preferred) a
-        directory containing a model.onnx, tokenizer config, and model config
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param sequence_length: sequence length to compile model and tokenizer for.
-        If a list of lengths is provided, then for each length, a model and
-        tokenizer will be compiled capable of handling that sequence length
-        (also known as a bucket). Default is 128
-    :param doc_stride: if the context is too long to fit with the question for the
-        model, it will be split in several chunks with some overlap. This argument
-        controls the size of that overlap. Currently, only reading the first span
-        is supported (everything after doc_stride will be truncated). Default
-        is 128
-    :param max_question_len: maximum length of the question after tokenization.
-        It will be truncated if needed. Default is 64
-    :param max_answer_len: maximum length of answer after decoding. Default is 15
-    """
-    return Pipeline.create("question_answering", *args, **kwargs)
-
-
-def text_classification_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    transformers text classification pipeline
-
-    example instantiation:
-    ```python
-    text_classifier = Pipeline.create(
-        task="text_classification",
-        model_path="text_classification_model_dir/",
-        batch_size=BATCH_SIZE,
-    )
-    ```
-
-    example batch size 1, single text inputs (ie sentiment analysis):
-    ```python
-    sentiment = text_classifier("the food tastes great")
-    sentiment = text_classifier(["the food tastes great"])
-    sentiment = text_classifier([["the food tastes great"]])
-    ```
-
-    example batch size 1, multi text input (ie QQP like tasks):
-    ```python
-    prediction = text_classifier([["how is the food?", "what is the food?"]])
-    ```
-
-    example batch size n, single text inputs:
-    ```python
-    sentiments = text_classifier(["the food tastes great", "the food tastes bad"])
-    sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]])
-    ```
-
-    :param model_path: sparsezoo stub to a transformers model or (preferred) a
-        directory containing a model.onnx, tokenizer config, and model config
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param sequence_length: sequence length to compile model and tokenizer for.
-        If a list of lengths is provided, then for each length, a model and
-        tokenizer will be compiled capable of handling that sequence length
-        (also known as a bucket). Default is 128
-    :param return_all_scores: if True, instead of returning the prediction as the
-        argmax of model class predictions, will return all scores and labels as
-        a list for each result in the batch. Default is False
-    """
-    return Pipeline.create("text_classification", *args, **kwargs)
-
-
-def sentiment_analysis_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    transformers text classification pipeline
-
-    example instantiation:
-    ```python
-    text_classifier = Pipeline.create(
-        task="text_classification",
-        model_path="text_classification_model_dir/",
-        batch_size=BATCH_SIZE,
-    )
-    ```
-
-    example batch size 1, single text inputs (ie sentiment analysis):
-    ```python
-    sentiment = text_classifier("the food tastes great")
-    sentiment = text_classifier(["the food tastes great"])
-    sentiment = text_classifier([["the food tastes great"]])
-    ```
-
-    example batch size 1, multi text input (ie QQP like tasks):
-    ```python
-    prediction = text_classifier([["how is the food?", "what is the food?"]])
-    ```
-
-    example batch size n, single text inputs:
-    ```python
-    sentiments = text_classifier(["the food tastes great", "the food tastes bad"])
-    sentiments = text_classifier([["the food tastes great"], ["the food tastes bad"]])
-    ```
-
-    :param model_path: sparsezoo stub to a transformers model or (preferred) a
-        directory containing a model.onnx, tokenizer config, and model config
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param sequence_length: sequence length to compile model and tokenizer for.
-        If a list of lengths is provided, then for each length, a model and
-        tokenizer will be compiled capable of handling that sequence length
-        (also known as a bucket). Default is 128
-    :param return_all_scores: if True, instead of returning the prediction as the
-        argmax of model class predictions, will return all scores and labels as
-        a list for each result in the batch. Default is False
-    """
-    return Pipeline.create("text_classification", *args, **kwargs)
-
-
-def token_classification_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    transformers token classification pipeline
-
-    example instantiation:
-    ```python
-    token_classifier = Pipeline.create(
-        task="token_classification",
-        model_path="token_classification_model_dir/",
-        batch_size=BATCH_SIZE,
-    )
-    ```
-
-    :param model_path: sparsezoo stub to a transformers model or (preferred) a
-        directory containing a model.onnx, tokenizer config, and model config
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param sequence_length: sequence length to compile model and tokenizer for.
-        If a list of lengths is provided, then for each length, a model and
-        tokenizer will be compiled capable of handling that sequence length
-        (also known as a bucket). Default is 128
-    :param aggregation_strategy: how to aggregate tokens in postprocessing. Options
-        include 'none', 'simple', 'first', 'average', and 'max'. Default is None
-    :param ignore_labels: list of label names to ignore in output. Default is
-        ['0'] which ignores the default known class label
-    """
-    return Pipeline.create("token_classification", *args, **kwargs)
-
-
-def image_classification_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    Image classification pipeline for DeepSparse
-
-    :param model_path: path on local system or SparseZoo stub to load the model from
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param class_names: Optional dict, or json file of class names to use for
-        mapping class ids to class labels. Default is None
-    """
-    return Pipeline.create("image_classification", *args, **kwargs)
-
-
-def yolo_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    Image Segmentation YOLO pipeline for DeepSparse
+        router_validation = self.router.validate(self.ops)
 
-    :param model_path: path on local system or SparseZoo stub to load the model from
-    :param engine_type: inference engine to use. Currently supported values
-        include 'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param class_names: Optional string identifier, dict, or json file of
-        class names to use for mapping class ids to class labels. Default is
-        `coco`
-    """
-    return Pipeline.create("yolo", *args, **kwargs)
-
-
-def haystack_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    Neural Magic pipeline for running Haystack DocumentSearchPipeline.
-    Supports selected Haystack Nodes as well as Haystack nodes integrated
-    with the Neural Magic DeepSparse Engine
-
-    example embedding model instantiation:
-    ```python
-    haystack_pipeline = Pipeline.create(
-        task="information_retrieval_haystack",
-        model_path="masked_language_modeling_model_dir/",
-        config={
-            "document_store": "InMemoryDocumentStore",
-            "document_store_args": {
-                "similarity": "cosine",
-                "use_gpu": False,
-            },
-            "retriever": "DeepSparseEmbeddingRetriever",
-            "retriever_args": {
-                "extraction_strategy": "reduce_mean"
-            }
-        },
-    )
-    ```
-
-    example deepsparse biencoder instantiation
-    ```python
-    haystack_pipeline = Pipeline.create(
-        task="information_retrieval_haystack",
-        config={
-            "document_store": "InMemoryDocumentStore",
-            "document_store_args": {
-                "similarity": "cosine",
-                "use_gpu": False,
-            },
-            "retriever": "DeepSparseDensePassageRetriever",
-            "retriever_args": {
-                "query_model_path": "./query_model",
-                "passage_model_path": "./passage_model"
-            }
-        },
-    )
-    ```
-
-    writing documents:
-    ```python
-    haystack_pipeline.write_documents([
-        {
-            "title": "Claude Shannon",
-            "content": "Claude Elwood Shannon was an American mathematician, "
-            "electrical engineer, and cryptographer known as a father of "
-            "information theory. He was a 21-year-old master's degree student at "
-            "the Massachusetts Institute of Technology (MIT)."
-        },
-        {
-            "title": "Vincent van Gogh",
-            "content": "Van Gogh was born into an upper-middle-class family. "
-            "As a child he was serious, quiet and thoughtful. He began drawing "
-            "at an early age and as a young man worked as an art dealer."
-        },
-        {
-            "title": "Stevie Wonder",
-            "content": "Stevland Hardaway Morris, known professionally as "
-            "Stevie Wonder, is an American singer and musician, who is "
-            "credited as a pioneer and influence by musicians across a range "
-            "of genres."
-        }
-    ])
-    ```
-
-    example queries:
-    ```python
-    from deepsparse.transformers.haystack import print_pipeline_documents
-    pipeline_outputs = haystack_pipeline(
-        queries="who invented information theory",
-        params={"Retriever": {"top_k": 4}}
-    )
-    print_pipeline_documents(pipeline_outputs)
-
-    pipeline_outputs = haystack_pipeline(
-        queries=[
-            "famous artists",
-            "What is Stevie Wonder's real name?"
-        ],
-        params={"Retriever": {"top_k": 4}}
-    )
-    print_pipeline_documents(pipeline_outputs)
-    ```
-
-    :param model_path: sparsezoo stub to a transformers model or (preferred) a
-        directory containing a model.onnx, tokenizer config, and model config
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param sequence_length: sequence length to compile model and tokenizer for.
-        If a list of lengths is provided, then for each length, a model and
-        tokenizer will be compiled capable of handling that sequence length
-        (also known as a bucket). Default is 128
-    :param docs: list of documents to be written to document_store. Can also
-        be written after instantiation with write_documents method.
-        Default is None
-    :param config: dictionary or instance of HaystackPipelineConfig. Used to
-        specify Haystack node arguments
-    :param retriever_kwargs: keyword arguments to be passed to retriever. If
-        the retriever is a deepsparse retriever, then these arguments will also
-        be passed to the TransformersEmbeddingExtractionPipeline of the retriever
-    """
-    return Pipeline.create("information_retrieval_haystack", *args, **kwargs)
-
-
-def embedding_extraction_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    embedding extraction pipeline for extracting intermediate layer embeddings
-    from transformer models
-
-    example instantiation:
-    ```python
-    embedding_extraction_pipeline = Pipeline.create(
-        task="embedding_extraction",
-        model_path="masked_language_modeling_model_dir/",
-    )
-    results = embedding_extraction_pipeline(
-        [
-            "the warriors have won the nba finals"
-            "the warriors are the greatest basketball team ever"
-        ]
-    )
-    emb_1, emb_2 = results.embeddings
-    # (expect emb_1 and emb_2 to have high cosine similiarity)
-    ```
-
-    :param model_path: sparsezoo stub to a transformers model or (preferred) a
-        directory containing a model.onnx, tokenizer config, and model config
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: static batch size to use for inference. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param sequence_length: sequence length to compile model and tokenizer for.
-        If a list of lengths is provided, then for each length, a model and
-        tokenizer will be compiled capable of handling that sequence length
-        (also known as a bucket). Default is 128
-    :param emb_extraction_layer: if an int, the transformer layer number from
-        which the embeddings will be extracted. If a string, the name of last
-        ONNX node in model to draw embeddings from. If None, leave the model
-        unchanged. Default is -1 (last transformer layer before prediction head)
-    :param model_size: size of transformer model (size of hidden layer per token
-        if the model is cut). Default is 768
-    :param extraction_strategy: method of pooling embedding values. Currently
-        supported values are 'per_token', 'reduce_mean', 'reduce_max' and 'cls_token'.
-        Default is 'per_token'
-    :param return_numpy: return embeddings a list of numpy arrays, list of lists
-        of floats otherwise. Default is True
-    :param context: context for engine. If None, then the engine will be initialized
-        with 2 streams to make use of parallel inference of labels. Default is None
-    """
-    return Pipeline.create("embedding_extraction", *args, **kwargs)
-
-
-def zero_shot_text_classification_pipeline(*args, **kwargs) -> "Pipeline":
-    """
-    Transformers zero shot text classification pipeline. This pipeline allows for
-    text classification using models which were trained on datasets not originally
-    meant for this task.
-
-    This class upon construction returns an instance of a child Pipeline which
-    inherits from ZeroShotTextClassificationPipelineBase. Which type of Pipeline
-    is returned depends on the value of the passed model_scheme argument.
-
-    example dynamic labels:
-    ```python
-    zero_shot_text_classifier = Pipeline.create(
-        task="zero_shot_text_classification",
-        model_scheme="mnli",
-        model_config={"hypothesis_template": "This text is related to {}"},
-        model_path="mnli_model_dir/",
-    )
-
-    sequence_to_classify = "Who are you voting for in 2020?"
-    candidate_labels = ["Europe", "public health", "politics"]
-    zero_shot_text_classifier(sequences=sequence_to_classify, labels=candidate_labels)
-    >>> ZeroShotTextClassificationOutput(
-        sequences='Who are you voting for in 2020?',
-        labels=['politics', 'public health', 'Europe'],
-        scores=[0.9073666334152222, 0.046810582280159, 0.04582275450229645])
-    ```
-
-    example static labels:
-    ```python
-    zero_shot_text_classifier = Pipeline.create(
-        task="zero_shot_text_classification",
-        model_scheme="mnli",
-        model_config={"hypothesis_template": "This text is related to {}"},
-        model_path="mnli_model_dir/",
-        labels=["politics", "Europe", "public health"]
-    )
-
-    sequence_to_classify = "Who are you voting for in 2020?"
-    zero_shot_text_classifier(sequences=sequence_to_classify)
-    >>> ZeroShotTextClassificationOutput(
-        sequences='Who are you voting for in 2020?',
-        labels=['politics', 'public health', 'Europe'],
-        scores=[0.9073666334152222, 0.046810582280159, 0.04582275450229645])
-    ```
-
-    Note that labels must either be provided during pipeline instantiation via
-    the constructor, at inference time, but not both.
-
-    Note that if a hypothesis_template is provided at inference time, then it
-    will override the value provided during model instantiation
-
-    :param model_path: sparsezoo stub to a transformers model or (preferred) a
-        directory containing a model.onnx, tokenizer config, and model config
-    :param engine_type: inference engine to use. Currently supported values include
-        'deepsparse' and 'onnxruntime'. Default is 'deepsparse'
-    :param batch_size: batch size must divide sequences * labels, regardless of
-        whether using dynamic or static labels. Default is 1
-    :param num_cores: number of CPU cores to allocate for inference engine. None
-        specifies all available cores. Default is None
-    :param scheduler: (deepsparse only) kind of scheduler to execute with.
-        Pass None for the default
-    :param input_shapes: list of shapes to set ONNX the inputs to. Pass None
-        to use model as-is. Default is None
-    :param alias: optional name to give this pipeline instance, useful when
-        inferencing with multiple models. Default is None
-    :param sequence_length: sequence length to compile model and tokenizer for.
-        If a list of lengths is provided, then for each length, a model and
-        tokenizer will be compiled capable of handling that sequence length
-        (also known as a bucket). Default is 128
-    :param default_model_name: huggingface transformers model name to use to
-        load a tokenizer and model config when none are provided in the `model_path`.
-        Default is "bert-base-uncased"
-    :param model_scheme: training scheme used to train the model used for zero shot.
-        Default is "mnli"
-    :param model_config: config object specific to the model_scheme of this model
-        or a dict of config keyword arguments
-    :param labels: static list of labels to perform text classification with. Can
-        also be provided at inference time
-    :param context: context for engine. If None, then the engine will be initialized
-        with 2 streams to make use of parallel inference of labels
-    """
-    return Pipeline.create("zero_shot_text_classification", *args, **kwargs)
+        if router_validation is False:
+            # default error message
+            op_types = [type(op) for op in self.ops]
+            raise ValueError(f"Invalid Router: {type(self.router)} for ops: {op_types}")
+        elif isinstance(router_validation, str):
+            raise ValueError(f"Invalid Router for operators: {router_validation}")
diff --git a/src/deepsparse/pipelines/custom_pipeline.py b/src/deepsparse/pipelines/custom_pipeline.py
index 5fe6b014a8..a10ab32ea1 100644
--- a/src/deepsparse/pipelines/custom_pipeline.py
+++ b/src/deepsparse/pipelines/custom_pipeline.py
@@ -18,7 +18,7 @@
 import numpy
 from pydantic import BaseModel
 
-from deepsparse.pipeline import Pipeline
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.utils.onnx import model_to_path
 
 
diff --git a/src/deepsparse/pipelines/embedding_extraction.py b/src/deepsparse/pipelines/embedding_extraction.py
index e812b3e9a9..801c242afd 100644
--- a/src/deepsparse/pipelines/embedding_extraction.py
+++ b/src/deepsparse/pipelines/embedding_extraction.py
@@ -23,7 +23,7 @@
 import numpy
 from pydantic import BaseModel, Field
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.log import get_main_logger
 from deepsparse.utils import truncate_onnx_embedding_model
 
diff --git a/src/deepsparse/v2/routers/__init__.py b/src/deepsparse/routers/__init__.py
similarity index 100%
rename from src/deepsparse/v2/routers/__init__.py
rename to src/deepsparse/routers/__init__.py
diff --git a/src/deepsparse/v2/routers/router.py b/src/deepsparse/routers/router.py
similarity index 97%
rename from src/deepsparse/v2/routers/router.py
rename to src/deepsparse/routers/router.py
index 6740f706f1..08e2fe5aa9 100644
--- a/src/deepsparse/v2/routers/router.py
+++ b/src/deepsparse/routers/router.py
@@ -17,7 +17,7 @@
 from abc import abstractmethod
 from typing import Any, Dict, List, Optional, Union
 
-from deepsparse.v2.operators import Operator
+from deepsparse.operators import Operator
 
 
 _LOGGER = logging.getLogger(__name__)
@@ -83,7 +83,7 @@ class LinearRouter(Router):
 
     def __init__(self, end_route: int, start_route: int = 0):
         super().__init__(end_route=end_route, start_route=start_route)
-        _LOGGER.warn("SPLIT and JOIN are not yet supported for the LinearRouter.")
+        _LOGGER.warning("SPLIT and JOIN are not yet supported for the LinearRouter.")
 
     def next(
         self, past: int, ops: Optional[List[Operator]] = None, inp: Optional[Any] = None
diff --git a/src/deepsparse/v2/schedulers/__init__.py b/src/deepsparse/schedulers/__init__.py
similarity index 100%
rename from src/deepsparse/v2/schedulers/__init__.py
rename to src/deepsparse/schedulers/__init__.py
diff --git a/src/deepsparse/v2/schedulers/continuous_batching_scheduler.py b/src/deepsparse/schedulers/continuous_batching_scheduler.py
similarity index 97%
rename from src/deepsparse/v2/schedulers/continuous_batching_scheduler.py
rename to src/deepsparse/schedulers/continuous_batching_scheduler.py
index cc74ac0996..03bcda019f 100644
--- a/src/deepsparse/v2/schedulers/continuous_batching_scheduler.py
+++ b/src/deepsparse/schedulers/continuous_batching_scheduler.py
@@ -17,9 +17,9 @@
 from threading import Lock
 from typing import List
 
-from deepsparse.v2.operators import EngineOperator, Operator
-from deepsparse.v2.schedulers.scheduler import OperatorScheduler
-from deepsparse.v2.schedulers.utils import (
+from deepsparse.operators import EngineOperator, Operator
+from deepsparse.schedulers.scheduler import OperatorScheduler
+from deepsparse.schedulers.utils import (
     ContinuousBatchingExecutorThread,
     ContinuousBatchingQueues,
 )
diff --git a/src/deepsparse/v2/schedulers/scheduler.py b/src/deepsparse/schedulers/scheduler.py
similarity index 98%
rename from src/deepsparse/v2/schedulers/scheduler.py
rename to src/deepsparse/schedulers/scheduler.py
index 37f2cfce90..6e89d334dc 100644
--- a/src/deepsparse/v2/schedulers/scheduler.py
+++ b/src/deepsparse/schedulers/scheduler.py
@@ -17,7 +17,7 @@
 from concurrent.futures import Future, ThreadPoolExecutor
 from typing import Callable, Optional
 
-from deepsparse.v2.operators import Operator
+from deepsparse.operators import Operator
 
 
 __all__ = ["OperatorScheduler"]
diff --git a/src/deepsparse/v2/schedulers/scheduler_group.py b/src/deepsparse/schedulers/scheduler_group.py
similarity index 94%
rename from src/deepsparse/v2/schedulers/scheduler_group.py
rename to src/deepsparse/schedulers/scheduler_group.py
index 201fcee150..01e590435d 100644
--- a/src/deepsparse/v2/schedulers/scheduler_group.py
+++ b/src/deepsparse/schedulers/scheduler_group.py
@@ -16,8 +16,8 @@
 from concurrent.futures import Future
 from typing import Any, List
 
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.schedulers.scheduler import OperatorScheduler
+from deepsparse.operators import Operator
+from deepsparse.schedulers.scheduler import OperatorScheduler
 
 
 __all__ = ["SchedulerGroup"]
diff --git a/src/deepsparse/v2/schedulers/utils/__init__.py b/src/deepsparse/schedulers/utils/__init__.py
similarity index 100%
rename from src/deepsparse/v2/schedulers/utils/__init__.py
rename to src/deepsparse/schedulers/utils/__init__.py
diff --git a/src/deepsparse/v2/schedulers/utils/continuous_batching_executor.py b/src/deepsparse/schedulers/utils/continuous_batching_executor.py
similarity index 95%
rename from src/deepsparse/v2/schedulers/utils/continuous_batching_executor.py
rename to src/deepsparse/schedulers/utils/continuous_batching_executor.py
index 40ff00ca4f..7a2fdba123 100644
--- a/src/deepsparse/v2/schedulers/utils/continuous_batching_executor.py
+++ b/src/deepsparse/schedulers/utils/continuous_batching_executor.py
@@ -16,8 +16,8 @@
 from typing import Dict
 
 from deepsparse import Engine
-from deepsparse.v2.operators import EngineOperator
-from deepsparse.v2.schedulers.utils.continuous_batching_queues import (
+from deepsparse.operators import EngineOperator
+from deepsparse.schedulers.utils.continuous_batching_queues import (
     ContinuousBatchingQueues,
 )
 
diff --git a/src/deepsparse/v2/schedulers/utils/continuous_batching_queues.py b/src/deepsparse/schedulers/utils/continuous_batching_queues.py
similarity index 100%
rename from src/deepsparse/v2/schedulers/utils/continuous_batching_queues.py
rename to src/deepsparse/schedulers/utils/continuous_batching_queues.py
diff --git a/src/deepsparse/server/cli.py b/src/deepsparse/server/cli.py
index c51ba2f972..d92ef1a3bb 100644
--- a/src/deepsparse/server/cli.py
+++ b/src/deepsparse/server/cli.py
@@ -27,7 +27,8 @@
 import click
 import yaml
 
-from deepsparse.pipeline import SupportedTasks
+# TODO: update to use new tasks once server support lands
+from deepsparse.legacy.tasks import SupportedTasks
 from deepsparse.server.config import EndpointConfig, ServerConfig
 from deepsparse.server.deepsparse_server import DeepsparseServer
 from deepsparse.server.openai_server import OpenAIServer
diff --git a/src/deepsparse/server/config.py b/src/deepsparse/server/config.py
index 8a8f01a0a8..aafb42e59c 100644
--- a/src/deepsparse/server/config.py
+++ b/src/deepsparse/server/config.py
@@ -17,14 +17,14 @@
 
 from pydantic import BaseModel, Field, validator
 
-from deepsparse import DEEPSPARSE_ENGINE, PipelineConfig
+from deepsparse.legacy.pipeline import DEEPSPARSE_ENGINE, PipelineConfig
+from deepsparse.legacy.tasks import SupportedTasks
 from deepsparse.loggers.config import (
     MetricFunctionConfig,
     PipelineSystemLoggingConfig,
     SystemLoggingConfig,
     SystemLoggingGroup,
 )
-from deepsparse.tasks import SupportedTasks
 
 
 __all__ = [
diff --git a/src/deepsparse/server/deepsparse_server.py b/src/deepsparse/server/deepsparse_server.py
index da68d64ee9..a6dffe5346 100644
--- a/src/deepsparse/server/deepsparse_server.py
+++ b/src/deepsparse/server/deepsparse_server.py
@@ -15,7 +15,7 @@
 import logging
 from functools import partial
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.server.config import EndpointConfig
 from deepsparse.server.server import CheckReady, ModelMetaData, ProxyPipeline, Server
 from fastapi import FastAPI
diff --git a/src/deepsparse/server/openai_server.py b/src/deepsparse/server/openai_server.py
index d32dab0d62..c9f4ef2f16 100644
--- a/src/deepsparse/server/openai_server.py
+++ b/src/deepsparse/server/openai_server.py
@@ -18,7 +18,7 @@
 from http import HTTPStatus
 from typing import AsyncGenerator, Dict, List, Optional
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.server.config import EndpointConfig
 from deepsparse.server.helpers import create_error_response
 from deepsparse.server.output import CompletionOutput, RequestOutput
diff --git a/src/deepsparse/tasks.py b/src/deepsparse/tasks.py
index b220519571..7d0c8cef64 100644
--- a/src/deepsparse/tasks.py
+++ b/src/deepsparse/tasks.py
@@ -78,30 +78,6 @@ class SupportedTasks:
     The supported tasks in the DeepSparse pipeline and system
     """
 
-    nlp = namedtuple(
-        "nlp",
-        [
-            "question_answering",
-            "text_classification",
-            "token_classification",
-            "zero_shot_text_classification",
-            "transformers_embedding_extraction",
-        ],
-    )(
-        question_answering=AliasedTask("question_answering", ["qa"]),
-        text_classification=AliasedTask(
-            "text_classification", ["glue", "sentiment_analysis"]
-        ),
-        token_classification=AliasedTask("token_classification", ["ner"]),
-        zero_shot_text_classification=AliasedTask("zero_shot_text_classification", []),
-        transformers_embedding_extraction=AliasedTask(
-            "transformers_embedding_extraction", []
-        ),
-    )
-
-    chat = namedtuple("chat", ["chatbot", "chat"])(
-        chatbot=AliasedTask("chatbot", []), chat=AliasedTask("chat", [])
-    )
     text_generation = namedtuple(
         "text_generation", ["text_generation", "opt", "bloom"]
     )(
@@ -109,8 +85,12 @@ class SupportedTasks:
         opt=AliasedTask("opt", []),
         bloom=AliasedTask("bloom", []),
     )
-    code_generation = namedtuple("code_generation", ["code_generation", "codegen"])(
+
+    code_generation = namedtuple(
+        "code_generation", ["code_generation", "code_gen", "codegen"]
+    )(
         code_generation=AliasedTask("code_generation", []),
+        code_gen=AliasedTask("code_gen", []),
         codegen=AliasedTask("codegen", []),
     )
 
@@ -121,43 +101,7 @@ class SupportedTasks:
         ),
     )
 
-    yolo = namedtuple("yolo", ["yolo"])(
-        yolo=AliasedTask("yolo", ["yolo"]),
-    )
-    yolov8 = namedtuple("yolov8", ["yolov8"])(
-        yolov8=AliasedTask("yolov8", ["yolov8"]),
-    )
-    yolact = namedtuple("yolact", ["yolact"])(
-        yolact=AliasedTask("yolact", ["yolact"]),
-    )
-
-    haystack = namedtuple("haystack", ["information_retrieval_haystack"])(
-        information_retrieval_haystack=AliasedTask(
-            "information_retrieval_haystack", ["haystack"]
-        ),
-    )
-    embedding_extraction = namedtuple("embedding_extraction", ["embedding_extraction"])(
-        embedding_extraction=AliasedTask(
-            "embedding_extraction", ["embedding_extraction"]
-        ),
-    )
-    open_pif_paf = namedtuple("open_pif_paf", ["open_pif_paf"])(
-        open_pif_paf=AliasedTask("open_pif_paf", ["open_pif_paf"]),
-    )
-
-    all_task_categories = [
-        nlp,
-        image_classification,
-        yolo,
-        yolov8,
-        yolact,
-        haystack,
-        embedding_extraction,
-        open_pif_paf,
-        text_generation,
-        chat,
-        code_generation,
-    ]
+    all_task_categories = [text_generation, code_generation, image_classification]
 
     @classmethod
     def check_register_task(
@@ -168,54 +112,16 @@ def check_register_task(
         :param extra_tasks: valid task names that are not included in supported tasks.
             i.e. tasks registered to Pipeline at runtime
         """
-        if task == "custom":
-            # custom task, register the CustomPipeline
-            import deepsparse.pipelines.custom_pipeline  # noqa: F401
-
-        elif cls.is_text_generation(task):
+        if cls.is_text_generation(task):
             import deepsparse.transformers.pipelines.text_generation  # noqa: F401
 
-        elif cls.is_chat(task):
-            import deepsparse.transformers.pipelines.chat  # noqa: F401
-
         elif cls.is_code_generation(task):
             import deepsparse.transformers.pipelines.code_generation  # noqa: F401
 
-        elif cls.is_nlp(task):
-            # trigger transformers pipelines to register with Pipeline.register
-            import deepsparse.transformers.pipelines  # noqa: F401
-
         elif cls.is_image_classification(task):
             # trigger image classification pipelines to
             # register with Pipeline.register
-            import deepsparse.image_classification.pipelines  # noqa: F401
-
-        elif cls.is_yolact(task):
-            # trigger yolo pipelines to register with Pipeline.register
-            import deepsparse.yolact.pipelines  # noqa: F401
-
-        elif cls.is_yolo(task):
-            # trigger yolo pipelines to register with Pipeline.register
-            import deepsparse.yolo.pipelines  # noqa: F401
-
-        elif cls.is_yolov8(task):
-            # trigger yolo pipelines to register with Pipeline.register
-            import deepsparse.yolov8.pipelines  # noqa: F401
-
-        elif cls.is_haystack(task):
-            # trigger haystack pipeline as well as transformers pipelines to
-            # register with Pipeline.register
-            import deepsparse.transformers.haystack  # noqa: F401
-
-        elif cls.is_embedding_extraction(task):
-            # trigger embedding_extraction pipelines to register with
-            #  Pipeline.register
-            import deepsparse.pipelines.embedding_extraction  # noqa :F401
-
-        elif cls.is_open_pif_paf(task):
-            # trigger embedding_extraction pipelines to register with
-            #  Pipeline.register
-            import deepsparse.open_pif_paf.pipelines  # noqa :F401
+            import deepsparse.image_classification.pipeline  # noqa: F401
 
         all_tasks = set(cls.task_names() + (list(extra_tasks or [])))
         if task not in all_tasks:
@@ -224,14 +130,6 @@ def check_register_task(
                 f"{list(all_tasks)}"
             )
 
-    @classmethod
-    def is_chat(cls, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether it is a chat task
-        :return: True if it is a chat task, False otherwise
-        """
-        return any(chat_task.matches(task) for chat_task in cls.chat)
-
     @classmethod
     def is_text_generation(cls, task: str) -> bool:
         """
@@ -244,37 +142,6 @@ def is_text_generation(cls, task: str) -> bool:
             for text_generation_task in cls.text_generation
         )
 
-    @classmethod
-    def is_code_generation(cls, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether it is a text generation task
-            such as codegen
-        :return: True if it is a text generation task, False otherwise
-        """
-        return any(
-            code_generation_task.matches(task)
-            for code_generation_task in cls.code_generation
-        )
-
-    @classmethod
-    def is_nlp(cls, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether it is an nlp task
-            such as question_answering
-        :return: True if it is an nlp task, False otherwise
-        """
-        return any([nlp_task.matches(task) for nlp_task in cls.nlp])
-
-    @classmethod
-    def is_cv(cls, task: str) -> bool:
-        return (
-            cls.is_yolo(task)
-            or cls.is_yolov8(task)
-            or cls.is_yolact(task)
-            or cls.is_image_classification(task)
-            or cls.is_open_pif_paf(task)
-        )
-
     @classmethod
     def is_image_classification(cls, task: str) -> bool:
         """
@@ -284,67 +151,9 @@ def is_image_classification(cls, task: str) -> bool:
         """
         return any([ic_task.matches(task) for ic_task in cls.image_classification])
 
-    @classmethod
-    def is_yolo(cls, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether it is an image
-            segmentation task using YOLO
-        :return: True if it is an segmentation task using YOLO, False otherwise
-        """
-        return any([yolo_task.matches(task) for yolo_task in cls.yolo])
-
-    @classmethod
-    def is_yolov8(cls, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether it is an image
-            segmentation task using YOLOv8
-        :return: True if it is an segmentation task using YOLOv8, False otherwise
-        """
-        return any([yolov8_task.matches(task) for yolov8_task in cls.yolov8])
-
-    @classmethod
-    def is_yolact(cls, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether it is an image
-            segmentation task using YOLO
-        :return: True if it is an segmentation task using YOLO, False otherwise
-        """
-        return any([yolact_task.matches(task) for yolact_task in cls.yolact])
-
-    @classmethod
-    def is_haystack(cls, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether it is a haystack task
-        :return: True if it is a haystack task, False otherwise
-        """
-        return any([haystack_task.matches(task) for haystack_task in cls.haystack])
-
-    @classmethod
-    def is_embedding_extraction(cls, task):
-        """
-        :param task: the name of the task to check whether it is an
-            embedding_extraction task
-        :return: True if it is an embedding_extraction task, False otherwise
-        """
-        return any(
-            embedding_extraction_task.matches(task)
-            for embedding_extraction_task in cls.embedding_extraction
-        )
-
-    @classmethod
-    def is_open_pif_paf(cls, task):
-        """
-        :param task: the name of the task to check whether it is an
-            embedding_extraction task
-        :return: True if it is an open_pif_paf task, False otherwise
-        """
-        return any(
-            open_pif_paf_task.matches(task) for open_pif_paf_task in cls.open_pif_paf
-        )
-
     @classmethod
     def task_names(cls):
-        task_names = ["custom"]
+        task_names = []
         for task_category in cls.all_task_categories:
             for task in task_category:
                 unique_aliases = (
@@ -353,6 +162,18 @@ def task_names(cls):
                 task_names += (task._name, *unique_aliases)
         return task_names
 
+    @classmethod
+    def is_code_generation(cls, task: str) -> bool:
+        """
+        :param task: the name of the task to check whether it is a text generation task
+            such as codegen
+        :return: True if it is a text generation task, False otherwise
+        """
+        return any(
+            code_generation_task.matches(task)
+            for code_generation_task in cls.code_generation
+        )
+
 
 def dynamic_import_task(module_or_path: str) -> str:
     """
diff --git a/src/deepsparse/transformers/engines/nl_decoder_engine.py b/src/deepsparse/transformers/engines/nl_decoder_engine.py
index 99ab552660..1886b5a013 100644
--- a/src/deepsparse/transformers/engines/nl_decoder_engine.py
+++ b/src/deepsparse/transformers/engines/nl_decoder_engine.py
@@ -18,7 +18,7 @@
 import numpy
 
 from deepsparse.engine import Context
-from deepsparse.pipeline import DEEPSPARSE_ENGINE, create_engine
+from deepsparse.legacy.pipeline import DEEPSPARSE_ENGINE, create_engine
 from deepsparse.transformers.utils.decoder_kv_cache import DecoderKVCache
 from deepsparse.transformers.utils.timings import TextGenerationTimings
 from deepsparse.utils import TimerManager
diff --git a/src/deepsparse/transformers/haystack/pipeline.py b/src/deepsparse/transformers/haystack/pipeline.py
index 84aaa74c52..9fc6f28185 100644
--- a/src/deepsparse/transformers/haystack/pipeline.py
+++ b/src/deepsparse/transformers/haystack/pipeline.py
@@ -38,7 +38,7 @@
 from haystack.schema import Document
 from pydantic import BaseModel, Field
 
-from deepsparse import Pipeline
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.transformers import haystack as DeepSparseHaystack
 
 
diff --git a/src/deepsparse/transformers/pipelines/chat.py b/src/deepsparse/transformers/pipelines/chat.py
index 7f4497f88e..7a24bf070d 100644
--- a/src/deepsparse/transformers/pipelines/chat.py
+++ b/src/deepsparse/transformers/pipelines/chat.py
@@ -20,12 +20,14 @@
 import numpy
 from pydantic import Field, validator
 
-from deepsparse import Pipeline
-from deepsparse.transformers.pipelines.text_generation import (
+from deepsparse.legacy import Pipeline
+from deepsparse.legacy.transformers.pipelines.text_generation import (
+    TextGenerationPipeline,
+)
+from deepsparse.transformers.schemas.text_generation_schemas import (
     FinishReason,
     TextGenerationInput,
     TextGenerationOutput,
-    TextGenerationPipeline,
 )
 from deepsparse.transformers.utils import (
     DecoderKVCache,
diff --git a/src/deepsparse/transformers/pipelines/code_generation.py b/src/deepsparse/transformers/pipelines/code_generation.py
index ffbb7387d4..545d1b689e 100644
--- a/src/deepsparse/transformers/pipelines/code_generation.py
+++ b/src/deepsparse/transformers/pipelines/code_generation.py
@@ -13,17 +13,14 @@
 # limitations under the License.
 
 
-from deepsparse import Pipeline
+from deepsparse.operators import OperatorRegistry
 from deepsparse.transformers.pipelines.text_generation import TextGenerationPipeline
 
 
 __all__ = ["CodeGenerationPipeline"]
 
 
-@Pipeline.register(
-    task="code_generation",
-    task_aliases=["codegen"],
-)
+@OperatorRegistry.register(name=["code_generation", "code_gen", "codegen"])
 class CodeGenerationPipeline(TextGenerationPipeline):
     """
     Subclass of text generation pipeline to support any defaults or
diff --git a/src/deepsparse/transformers/pipelines/embedding_extraction.py b/src/deepsparse/transformers/pipelines/embedding_extraction.py
index 1c33f68697..9429a2fc73 100644
--- a/src/deepsparse/transformers/pipelines/embedding_extraction.py
+++ b/src/deepsparse/transformers/pipelines/embedding_extraction.py
@@ -41,7 +41,7 @@
 from pydantic import BaseModel, Field
 from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.log import get_main_logger
 from deepsparse.transformers.helpers import truncate_transformer_onnx_model
 from deepsparse.transformers.pipelines import TransformersPipeline
diff --git a/src/deepsparse/transformers/pipelines/pipeline.py b/src/deepsparse/transformers/pipelines/pipeline.py
index 0d54449e56..393d5d449c 100644
--- a/src/deepsparse/transformers/pipelines/pipeline.py
+++ b/src/deepsparse/transformers/pipelines/pipeline.py
@@ -26,7 +26,7 @@
 import transformers
 from transformers.models.auto import AutoTokenizer
 
-from deepsparse import Bucketable, Pipeline
+from deepsparse.legacy import Bucketable, Pipeline
 from deepsparse.transformers.helpers import (
     get_deployment_path,
     overwrite_transformer_onnx_model_inputs,
diff --git a/src/deepsparse/transformers/pipelines/question_answering.py b/src/deepsparse/transformers/pipelines/question_answering.py
index 7a60a2ddc8..dfd82e0e82 100644
--- a/src/deepsparse/transformers/pipelines/question_answering.py
+++ b/src/deepsparse/transformers/pipelines/question_answering.py
@@ -44,7 +44,7 @@
 from pydantic import BaseModel, Field
 from transformers.data import SquadExample
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.transformers.pipelines import TransformersPipeline
 
 
diff --git a/src/deepsparse/transformers/pipelines/text_classification.py b/src/deepsparse/transformers/pipelines/text_classification.py
index 1ceea46235..43bc22edd3 100644
--- a/src/deepsparse/transformers/pipelines/text_classification.py
+++ b/src/deepsparse/transformers/pipelines/text_classification.py
@@ -42,7 +42,7 @@
 from pydantic import BaseModel, Field
 from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.transformers.pipelines import TransformersPipeline
 
 
diff --git a/src/deepsparse/v2/text_generation/__init__.py b/src/deepsparse/transformers/pipelines/text_generation/__init__.py
similarity index 100%
rename from src/deepsparse/v2/text_generation/__init__.py
rename to src/deepsparse/transformers/pipelines/text_generation/__init__.py
diff --git a/src/deepsparse/v2/text_generation/autoregressive_preprocess_operator.py b/src/deepsparse/transformers/pipelines/text_generation/autoregressive_preprocess_operator.py
similarity index 97%
rename from src/deepsparse/v2/text_generation/autoregressive_preprocess_operator.py
rename to src/deepsparse/transformers/pipelines/text_generation/autoregressive_preprocess_operator.py
index 17d8dd662c..9fb17f3946 100644
--- a/src/deepsparse/v2/text_generation/autoregressive_preprocess_operator.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/autoregressive_preprocess_operator.py
@@ -15,9 +15,9 @@
 import logging
 from typing import Any
 
+from deepsparse.operators import Operator
 from deepsparse.transformers.utils.helpers import compute_engine_inputs
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.utils import PipelineState
+from deepsparse.utils import PipelineState
 
 
 _LOGGER = logging.getLogger(__name__)
diff --git a/src/deepsparse/v2/text_generation/compile_generated_tokens.py b/src/deepsparse/transformers/pipelines/text_generation/compile_generated_tokens.py
similarity index 94%
rename from src/deepsparse/v2/text_generation/compile_generated_tokens.py
rename to src/deepsparse/transformers/pipelines/text_generation/compile_generated_tokens.py
index 630067f8c3..3cd16e2888 100644
--- a/src/deepsparse/v2/text_generation/compile_generated_tokens.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/compile_generated_tokens.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.utils import InferenceState
+from deepsparse.operators import Operator
+from deepsparse.utils import InferenceState
 
 
 __all__ = ["CompileGeneratedTokens"]
diff --git a/src/deepsparse/v2/text_generation/compile_generations.py b/src/deepsparse/transformers/pipelines/text_generation/compile_generations.py
similarity index 91%
rename from src/deepsparse/v2/text_generation/compile_generations.py
rename to src/deepsparse/transformers/pipelines/text_generation/compile_generations.py
index ed8297ac01..2187e525a1 100644
--- a/src/deepsparse/v2/text_generation/compile_generations.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/compile_generations.py
@@ -16,9 +16,9 @@
 import numpy
 from pydantic import BaseModel, Field
 
-from deepsparse.transformers.pipelines.text_generation import FinishReason
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.utils import InferenceState
+from deepsparse.operators import Operator
+from deepsparse.transformers.schemas.text_generation_schemas import FinishReason
+from deepsparse.utils import InferenceState
 
 
 __all__ = ["CompileGenerations", "CompileGenerationsOutput"]
diff --git a/src/deepsparse/v2/text_generation/compile_logits.py b/src/deepsparse/transformers/pipelines/text_generation/compile_logits.py
similarity index 89%
rename from src/deepsparse/v2/text_generation/compile_logits.py
rename to src/deepsparse/transformers/pipelines/text_generation/compile_logits.py
index 48a7158f66..7785880980 100644
--- a/src/deepsparse/v2/text_generation/compile_logits.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/compile_logits.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.text_generation.nl_engine_operator import NLEngineOutputs
-from deepsparse.v2.utils import InferenceState
+from deepsparse.operators import Operator
+from deepsparse.transformers.pipelines.text_generation.nl_engine_operator import (
+    NLEngineOutputs,
+)
+from deepsparse.utils import InferenceState
 
 
 __all__ = ["CompilePromptLogits"]
diff --git a/src/deepsparse/v2/text_generation/generate_new_token.py b/src/deepsparse/transformers/pipelines/text_generation/generate_new_token.py
similarity index 92%
rename from src/deepsparse/v2/text_generation/generate_new_token.py
rename to src/deepsparse/transformers/pipelines/text_generation/generate_new_token.py
index 5bf48bbdbc..2ff21af54f 100644
--- a/src/deepsparse/v2/text_generation/generate_new_token.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/generate_new_token.py
@@ -15,10 +15,12 @@
 
 import transformers
 
-from deepsparse.transformers.pipelines.text_generation import FinishReason
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.text_generation.nl_engine_operator import NLEngineOutputs
-from deepsparse.v2.utils import InferenceState
+from deepsparse.operators import Operator
+from deepsparse.transformers.pipelines.text_generation.nl_engine_operator import (
+    NLEngineOutputs,
+)
+from deepsparse.transformers.schemas.text_generation_schemas import FinishReason
+from deepsparse.utils import InferenceState
 
 
 __all__ = ["GenerateNewTokenOperator"]
diff --git a/src/deepsparse/v2/text_generation/join_output.py b/src/deepsparse/transformers/pipelines/text_generation/join_output.py
similarity index 93%
rename from src/deepsparse/v2/text_generation/join_output.py
rename to src/deepsparse/transformers/pipelines/text_generation/join_output.py
index 8a6c77a2f1..b8176c19db 100644
--- a/src/deepsparse/v2/text_generation/join_output.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/join_output.py
@@ -16,9 +16,11 @@
 
 import numpy
 
+from deepsparse.operators import Operator
+from deepsparse.transformers.pipelines.text_generation.compile_generations import (
+    CompileGenerationsOutput,
+)
 from deepsparse.transformers.utils.helpers import pad_to_fixed_length
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.text_generation.compile_generations import CompileGenerationsOutput
 
 
 __all__ = ["JoinOutput"]
diff --git a/src/deepsparse/v2/text_generation/kv_cache_operator.py b/src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py
similarity index 98%
rename from src/deepsparse/v2/text_generation/kv_cache_operator.py
rename to src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py
index 3c15d0ff5a..7dd171c625 100644
--- a/src/deepsparse/v2/text_generation/kv_cache_operator.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/kv_cache_operator.py
@@ -16,12 +16,12 @@
 
 from pydantic import BaseModel, Field
 
+from deepsparse.operators import Operator
 from deepsparse.transformers.utils import DecoderKVCache
 from deepsparse.transformers.utils.helpers import (
     initialize_kv_cache_state,
     prepends_bos_token,
 )
-from deepsparse.v2.operators import Operator
 
 
 __all__ = ["KVCacheCreator", "KVCacheCreatorInput"]
diff --git a/src/deepsparse/v2/text_generation/multi_engine_prefill_operator.py b/src/deepsparse/transformers/pipelines/text_generation/multi_engine_prefill_operator.py
similarity index 96%
rename from src/deepsparse/v2/text_generation/multi_engine_prefill_operator.py
rename to src/deepsparse/transformers/pipelines/text_generation/multi_engine_prefill_operator.py
index 513c34dfc2..dca4fc3ff9 100644
--- a/src/deepsparse/v2/text_generation/multi_engine_prefill_operator.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/multi_engine_prefill_operator.py
@@ -15,9 +15,9 @@
 import logging
 from typing import Any
 
+from deepsparse.operators import Operator
 from deepsparse.transformers.utils.helpers import compute_engine_inputs
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.utils import PipelineState
+from deepsparse.utils import PipelineState
 
 
 _LOGGER = logging.getLogger(__name__)
diff --git a/src/deepsparse/v2/text_generation/nl_engine_operator.py b/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py
similarity index 98%
rename from src/deepsparse/v2/text_generation/nl_engine_operator.py
rename to src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py
index d8c80bbaee..d77fbf68df 100644
--- a/src/deepsparse/v2/text_generation/nl_engine_operator.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/nl_engine_operator.py
@@ -20,16 +20,16 @@
 import numpy
 from pydantic import BaseModel, Field
 
+from deepsparse.operators.engine_operator import (
+    DEEPSPARSE_ENGINE,
+    EngineOperator,
+    EngineOperatorInputs,
+)
 from deepsparse.utils import join_engine_outputs, split_engine_inputs
 from deepsparse.utils.onnx import (
     CACHE_INPUT_PREFIX,
     overwrite_onnx_model_inputs_for_kv_cache_models,
 )
-from deepsparse.v2.operators.engine_operator import (
-    DEEPSPARSE_ENGINE,
-    EngineOperator,
-    EngineOperatorInputs,
-)
 
 
 __all__ = ["NLEngineOperator", "NLEngineInputs"]
@@ -130,7 +130,9 @@ def __init__(
         self.internal_kv_cache = internal_kv_cache
         self.model_path = kwargs.get("model_path")
         (onnx_file_path, additional_outputs) = self.override_model_inputs(
-            self.model_path, batch_size=1, return_additional_outputs=True
+            self.model_path,
+            batch_size=kwargs.get("batch_size", 1),
+            return_additional_outputs=True,
         )
         output_indices_to_be_cached, kv_cache_data_type, = additional_outputs.get(
             "output_indices_to_be_cached"
diff --git a/src/deepsparse/v2/text_generation/pipeline.py b/src/deepsparse/transformers/pipelines/text_generation/pipeline.py
similarity index 93%
rename from src/deepsparse/v2/text_generation/pipeline.py
rename to src/deepsparse/transformers/pipelines/text_generation/pipeline.py
index 6e27942d19..30cbe99081 100644
--- a/src/deepsparse/v2/text_generation/pipeline.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/pipeline.py
@@ -13,17 +13,15 @@
 # limitations under the License.
 
 import logging
-from typing import Dict, List, Optional
+from typing import List, Optional
 
+from deepsparse.operators import EngineOperator
+from deepsparse.operators.registry import OperatorRegistry
+from deepsparse.pipeline import Pipeline
+from deepsparse.routers import GraphRouter
+from deepsparse.schedulers import ContinuousBatchingScheduler, OperatorScheduler
 from deepsparse.transformers.helpers import setup_transformers_pipeline
-from deepsparse.transformers.utils.helpers import process_generation_config
-from deepsparse.utils import split_engine_inputs
-from deepsparse.v2.operators import EngineOperator
-from deepsparse.v2.operators.registry import OperatorRegistry
-from deepsparse.v2.pipeline import Pipeline
-from deepsparse.v2.routers import GraphRouter
-from deepsparse.v2.schedulers import ContinuousBatchingScheduler, OperatorScheduler
-from deepsparse.v2.text_generation import (
+from deepsparse.transformers.pipelines.text_generation import (
     AutoRegressiveOperatorPreprocess,
     CompileGeneratedTokens,
     CompileGenerations,
@@ -39,7 +37,8 @@
     ProcessOutputs,
     TokenGeneratorOperator,
 )
-from deepsparse.v2.utils import PipelineState
+from deepsparse.transformers.utils.helpers import process_generation_config
+from deepsparse.utils import PipelineState, split_engine_inputs
 
 
 _LOGGER = logging.getLogger(__name__)
@@ -56,7 +55,7 @@ def __init__(
         force_max_tokens: bool = False,
         generation_config=None,
         continuous_batch_sizes: Optional[List[int]] = None,
-        engine_kwargs: Optional[Dict] = None,
+        **engine_kwargs,
     ):
         (
             self.model_path,
@@ -145,8 +144,8 @@ def __init__(
         continuous_batching_scheduler = None
         if continuous_batch_sizes:
             if internal_kv_cache:
-                _LOGGER.warn(
-                    "internal kv_cache is not supported with continuous_batching "
+                _LOGGER.warning(
+                    "continuous_batching is not supported with internal_kv_cache"
                 )
             else:
                 continuous_batching_scheduler = self._get_continuous_batching_scheduler(
diff --git a/src/deepsparse/v2/text_generation/prep_for_generation.py b/src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py
similarity index 93%
rename from src/deepsparse/v2/text_generation/prep_for_generation.py
rename to src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py
index 0ea4a06a02..0ac010aedf 100644
--- a/src/deepsparse/v2/text_generation/prep_for_generation.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/prep_for_generation.py
@@ -16,11 +16,11 @@
 
 import numpy
 
-from deepsparse.transformers.pipelines.text_generation import FinishReason
+from deepsparse.operators import Operator
+from deepsparse.transformers.pipelines.text_generation import TokenGeneratorOperator
+from deepsparse.transformers.schemas.text_generation_schemas import FinishReason
 from deepsparse.transformers.utils.helpers import set_generated_length
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.text_generation import TokenGeneratorOperator
-from deepsparse.v2.utils import InferenceState
+from deepsparse.utils import InferenceState
 
 
 __all__ = ["PrepareGeneration"]
diff --git a/src/deepsparse/v2/text_generation/prep_for_prefill.py b/src/deepsparse/transformers/pipelines/text_generation/prep_for_prefill.py
similarity index 96%
rename from src/deepsparse/v2/text_generation/prep_for_prefill.py
rename to src/deepsparse/transformers/pipelines/text_generation/prep_for_prefill.py
index 2e5fecb3e8..47b4965daf 100644
--- a/src/deepsparse/v2/text_generation/prep_for_prefill.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/prep_for_prefill.py
@@ -15,8 +15,8 @@
 import logging
 from typing import Any
 
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.utils import PipelineState
+from deepsparse.operators import Operator
+from deepsparse.utils import PipelineState
 
 
 _LOGGER = logging.getLogger(__name__)
diff --git a/src/deepsparse/v2/text_generation/process_inputs.py b/src/deepsparse/transformers/pipelines/text_generation/process_inputs.py
similarity index 97%
rename from src/deepsparse/v2/text_generation/process_inputs.py
rename to src/deepsparse/transformers/pipelines/text_generation/process_inputs.py
index 0f9147f916..05e93a9cc6 100644
--- a/src/deepsparse/v2/text_generation/process_inputs.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/process_inputs.py
@@ -17,7 +17,8 @@
 
 import transformers
 
-from deepsparse.transformers.pipelines.text_generation import (
+from deepsparse.operators import Operator
+from deepsparse.transformers.schemas.text_generation_schemas import (
     GenerationDefaults,
     TextGenerationInput,
 )
@@ -26,7 +27,6 @@
     override_config,
     repeat_inputs,
 )
-from deepsparse.v2.operators import Operator
 
 
 __all__ = ["ProcessInputsTextGeneration"]
diff --git a/src/deepsparse/v2/text_generation/process_outputs.py b/src/deepsparse/transformers/pipelines/text_generation/process_outputs.py
similarity index 94%
rename from src/deepsparse/v2/text_generation/process_outputs.py
rename to src/deepsparse/transformers/pipelines/text_generation/process_outputs.py
index 7173b8e256..15434175b9 100644
--- a/src/deepsparse/v2/text_generation/process_outputs.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/process_outputs.py
@@ -16,13 +16,13 @@
 
 import numpy
 
-from deepsparse.transformers.pipelines.text_generation import (
+from deepsparse.operators import Operator
+from deepsparse.transformers.schemas.text_generation_schemas import (
     FinishReason,
     GeneratedText,
     TextGenerationOutput,
 )
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.utils import InferenceState
+from deepsparse.utils import InferenceState
 
 
 class ProcessOutputs(Operator):
diff --git a/src/deepsparse/v2/text_generation/token_generator.py b/src/deepsparse/transformers/pipelines/text_generation/token_generator.py
similarity index 96%
rename from src/deepsparse/v2/text_generation/token_generator.py
rename to src/deepsparse/transformers/pipelines/text_generation/token_generator.py
index 9148d71cc8..3f46abd86f 100644
--- a/src/deepsparse/v2/text_generation/token_generator.py
+++ b/src/deepsparse/transformers/pipelines/text_generation/token_generator.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from deepsparse.operators import Operator
 from deepsparse.transformers.utils.token_generator import TokenGenerator
-from deepsparse.v2.operators import Operator
 
 
 __all__ = ["TokenGeneratorOperator"]
diff --git a/src/deepsparse/transformers/pipelines/token_classification.py b/src/deepsparse/transformers/pipelines/token_classification.py
index 66957fce97..4c719ee0c2 100644
--- a/src/deepsparse/transformers/pipelines/token_classification.py
+++ b/src/deepsparse/transformers/pipelines/token_classification.py
@@ -40,7 +40,7 @@
 from transformers.file_utils import ExplicitEnum
 from transformers.tokenization_utils_base import PaddingStrategy, TruncationStrategy
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.transformers.pipelines import TransformersPipeline
 
 
diff --git a/src/deepsparse/transformers/pipelines/zero_shot_text_classification.py b/src/deepsparse/transformers/pipelines/zero_shot_text_classification.py
index be24d0cd7d..bf0faa5c0f 100644
--- a/src/deepsparse/transformers/pipelines/zero_shot_text_classification.py
+++ b/src/deepsparse/transformers/pipelines/zero_shot_text_classification.py
@@ -58,7 +58,7 @@
 
 from pydantic import BaseModel, Field
 
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.transformers.pipelines import TransformersPipeline
 
 
diff --git a/src/deepsparse/transformers/pipelines_cli.py b/src/deepsparse/transformers/pipelines_cli.py
index 9243fdf5d2..887af4c594 100644
--- a/src/deepsparse/transformers/pipelines_cli.py
+++ b/src/deepsparse/transformers/pipelines_cli.py
@@ -86,7 +86,7 @@
 from pydantic import BaseModel
 
 from deepsparse import Pipeline
-from deepsparse.pipeline import SUPPORTED_PIPELINE_ENGINES
+from deepsparse.operators.engine_operator import SUPPORTED_PIPELINE_ENGINES
 from deepsparse.transformers import fix_numpy_types
 from deepsparse.transformers.loaders import SUPPORTED_EXTENSIONS, get_batch_loader
 
diff --git a/src/deepsparse/transformers/schemas/__init__.py b/src/deepsparse/transformers/schemas/__init__.py
new file mode 100644
index 0000000000..e59f70d938
--- /dev/null
+++ b/src/deepsparse/transformers/schemas/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# flake8: noqa
+
+from .text_generation_schemas import *
diff --git a/src/deepsparse/transformers/schemas/text_generation_schemas.py b/src/deepsparse/transformers/schemas/text_generation_schemas.py
new file mode 100644
index 0000000000..7e657f1098
--- /dev/null
+++ b/src/deepsparse/transformers/schemas/text_generation_schemas.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import pathlib
+from enum import Enum
+from typing import Any, Callable, Dict, List, Optional, Sequence, Union
+
+from pydantic import BaseModel, Field
+from transformers import GenerationConfig
+
+
+# Based off of https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig # noqa E501
+class GenerationDefaults:
+    # Parameters that control the length of the output
+    max_length = None
+    max_new_tokens = 100
+    # Parameters that control the generation strategy used
+    do_sample = False
+    # Parameters for manipulation of the model output logits
+    temperature = 1.0
+    top_k = 50
+    top_p = 1.0
+    repetition_penalty = 1.0
+    # Parameters that define the outputs
+    num_return_sequences = 1
+    output_scores = False
+
+
+class FinishReason(Enum):
+    STOP = "stop"
+    LENGTH = "length"
+    TIME = "time"
+    CALLBACK = "callback"
+    CAPACITY = "capacity"
+    MAX_NEW_TOKENS = "max_new_tokens"
+
+
+class TextGenerationInput(BaseModel):
+    class Config:
+        arbitrary_types_allowed = True
+
+    sequences: Union[str, List[str]] = Field(
+        alias="prompt",
+        description="The input sequences to generate the text from.",
+    )
+    return_input_tokens: bool = Field(
+        default=False,
+        description="A flag that indicates whether to return " "the input_tokens. ",
+    )
+    include_prompt_logits: bool = Field(
+        default=False,
+        description="A flag that indicates whether to return "
+        "the logits for the prompt. If set, prompt_logits are "
+        "`prepended` to the logits for the generated text sequence."
+        "Note: This flag is only applicable when output_scores "
+        "is `True`.",
+    )
+    fixed_sequences_length: bool = Field(
+        default=False,
+        description="A flag that indicates whether to modify "
+        "(pad or truncate) each input text sequence, so that "
+        "its tokenized length is equal to `sequence_length` "
+        "of tokens. Useful, when a batch of predictions needs "
+        "to have consistent length so one "
+        "can compute metric in a batched fashion. ",
+    )
+    streaming: bool = Field(
+        default=False,
+        description="Whether to stream the results back as they are generated. If "
+        "True, then the results are returned as a generator object which yields "
+        "the results as they are generated. If False, then the results are returned "
+        "as a list after it has completed.",
+    )
+    callback: Optional[Callable[[Any], Union[bool, Any]]] = Field(
+        default=None,
+        description="Callable that will be invoked "
+        "on each generated token. If the callable returns "
+        "`False`, the generation will stop. Default is `None`.",
+    )
+    stop: Union[None, str, Sequence[str]] = Field(
+        default=None,
+        description="A string or a list of strings that will be used as"
+        " stop tokens. (token generation will stop when any of the stop"
+        " tokens is generated). Set to `None` to ignore this parameter."
+        " Default is `None`.",
+    )
+
+    presence_penalty: Optional[float] = Field(
+        default=0.0,
+        description="Penalty applied for generating new token. Any existing"
+        " token results in the subtraction of its corresponding logit value."
+        " Default set to 0.0",
+    )
+
+    generation_config: Union[None, str, pathlib.Path, Dict, GenerationConfig] = Field(
+        default=None,
+        description="GenerationConfig file consisting of parameters used to control "
+        "sequences generated for each prompt. The current supported parameters are: "
+        "max_length, max_new_tokens, num_return_sequences, output_scores, top_p, "
+        "top_k, repetition_penalty, do_sample, temperature. If None is provided, "
+        "deepsparse defaults will be used. For all other input types, HuggingFace "
+        "defaults for GenerationConfig will be used. ",
+    )
+
+    generation_kwargs: Optional[Dict] = Field(
+        default=None,
+        description="Any arguments to override generation_config arguments. Refer to "
+        "the generation_config argument for a full list of supported variables.",
+    )
+
+
+class GeneratedText(BaseModel):
+    text: str = Field(
+        description="The generated sequence for a given prompt. If "
+        "streaming is enabled, this will be the next generated token."
+    )
+    score: Optional[Any] = Field(
+        default=None,
+        description="The score for the generated token or sequence. "
+        "The scores have the shape [sequence_length, vocab_size]",
+    )
+    finished: bool = Field(description="Whether generation has stopped.")
+    finished_reason: Optional[str] = Field(
+        default=None,
+        description="The reason for generation to stop. "
+        "Defined by FinishReason. One of stop, length, or time.",
+    )
+
+
+# TODO: Pydantic aliases allow assignment but not reference. Still need to update.
+class TextGenerationOutput(BaseModel):
+    created: datetime.datetime = Field(description="Time of inference creation.")
+    prompts: Union[str, List[str]] = Field(
+        description="Prompts used for the sequence generation. For multiple input "
+        "prompts, a list of prompts is returned"
+    )
+    generations: Union[List[GeneratedText], List[List[GeneratedText]]] = Field(
+        description="For a single prompt, a single list of GeneratedText is returned. "
+        "If multiple prompts are given, a list of GeneratedText is returned for each "
+        "prompt provided. If streamng is enabled, the next generated token is returned."
+        "Otherwise, the full generated sequence is returned."
+    )
+    input_tokens: Optional[
+        Any
+    ] = Field(  # dictionary mapping "token_ids" and "attention_mask" to numpy arrays
+        default=None,
+        description="The output of the tokenizer."
+        "Dictionary containing token_ids and attention_mask, "
+        "both mapping to arrays of size "
+        "[batch_size, sequence_length]",
+    )
+
+    class Config:
+        arbitrary_types_allowed = True
+        extra = "allow"
diff --git a/src/deepsparse/utils/__init__.py b/src/deepsparse/utils/__init__.py
index 8ad6b624da..dafa92b7ed 100644
--- a/src/deepsparse/utils/__init__.py
+++ b/src/deepsparse/utils/__init__.py
@@ -16,5 +16,9 @@
 
 from .cli_helpers import *
 from .data import *
+from .helpers import *
 from .onnx import *
+from .state import *
+from .subgraph import *
 from .timer import *
+from .types import *
diff --git a/src/deepsparse/v2/utils/helpers.py b/src/deepsparse/utils/helpers.py
similarity index 100%
rename from src/deepsparse/v2/utils/helpers.py
rename to src/deepsparse/utils/helpers.py
diff --git a/src/deepsparse/v2/utils/state.py b/src/deepsparse/utils/state.py
similarity index 100%
rename from src/deepsparse/v2/utils/state.py
rename to src/deepsparse/utils/state.py
diff --git a/src/deepsparse/v2/utils/data.py b/src/deepsparse/utils/subgraph.py
similarity index 96%
rename from src/deepsparse/v2/utils/data.py
rename to src/deepsparse/utils/subgraph.py
index 9ed340cb7c..d20717dcd7 100644
--- a/src/deepsparse/v2/utils/data.py
+++ b/src/deepsparse/utils/subgraph.py
@@ -15,7 +15,7 @@
 from dataclasses import dataclass
 from typing import Any, List
 
-from deepsparse.v2.utils import InferenceState
+from deepsparse.utils import InferenceState
 
 
 __all__ = ["SubGraph"]
diff --git a/src/deepsparse/v2/utils/types.py b/src/deepsparse/utils/types.py
similarity index 100%
rename from src/deepsparse/v2/utils/types.py
rename to src/deepsparse/utils/types.py
diff --git a/src/deepsparse/v2/pipeline.py b/src/deepsparse/v2/pipeline.py
deleted file mode 100644
index 40d41c586e..0000000000
--- a/src/deepsparse/v2/pipeline.py
+++ /dev/null
@@ -1,378 +0,0 @@
-# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import asyncio
-import copy
-from typing import Any, Dict, List, Optional, Union
-
-from deepsparse.v2.operators import EngineOperator, Operator
-from deepsparse.v2.routers import Router
-from deepsparse.v2.schedulers import (
-    ContinuousBatchingScheduler,
-    OperatorScheduler,
-    SchedulerGroup,
-)
-from deepsparse.v2.utils import InferenceState, PipelineState
-from deepsparse.v2.utils.data import SubGraph
-from deepsparse.v2.utils.helpers import run_func
-
-
-__all__ = ["Pipeline"]
-
-
-class Pipeline(Operator):
-    """
-    Pipeline accepts a series of operators, schedulers, and a router. Calling a pipeline
-    will use the router to run through all the defined operators. The operators should
-    be implemented using the Operator class and each implemented operator should be
-    responsible for a functional component of the pipelines. The flow of inputs/outputs
-    between the operators and the steps in the pipeline should be defined by the router,
-    (based off of the Router class), which dicates the next operator in the pipeline.
-    Execution of the operators will be handled by the provided schedulers.
-
-    :param ops: Operators to run within the pipeline. Can either be a list of operators
-        or dictionary of operators.
-    :param router: A Router which dictates the next operator to call.
-    :param schedulers: A list of schedulers to run operators.
-    :param pipeline_state: pipeline_state created during pipeline initialization
-
-    """
-
-    def __init__(
-        self,
-        ops: Union[Dict[str, Operator], List[Operator]],
-        router: Router,
-        schedulers: List[OperatorScheduler],
-        continuous_batching_scheduler: Optional[ContinuousBatchingScheduler] = None,
-        pipeline_state: Optional[PipelineState] = None,
-    ):
-
-        self.ops = ops
-        self.router = router
-        self.schedulers = schedulers
-        self.pipeline_state = pipeline_state
-        self._continuous_batching_scheduler = continuous_batching_scheduler
-        self.validate()
-
-        self._scheduler_group = SchedulerGroup(self.schedulers)
-
-    def _run_next(
-        self, inp: Any, inference_state: InferenceState, next_step: str, **kwargs
-    ):
-        if (
-            isinstance(self.ops[next_step], EngineOperator)
-            and self._continuous_batching_scheduler
-        ):
-            func = self._continuous_batching_scheduler.submit
-            inp = self.ops[next_step].input_schema(**inp)
-        else:
-            func = self._scheduler_group.submit
-
-        return run_func(
-            func=func,
-            operator=self.ops[next_step],
-            inp=inp,
-            pipeline_state=self.pipeline_state,
-            inference_state=inference_state,
-            **kwargs,
-        )
-
-    async def _run_sub_graphs(
-        self,
-        sub_graph_inputs: List[Any],
-        sub_graphs: List[SubGraph],
-        loop: Optional[asyncio.AbstractEventLoop] = None,
-    ) -> List[Any]:
-        """
-        Run a list of sub_graphs asynchronously. Polls to identify the sub graph that is
-        still running but has completed its current step. Schedules the next step
-        subgraph step. This is repeated until all subgraphs have finished running and
-        have reached their end step (stored in the Subgraph.end attribute).
-
-        :param sub_graph_inputs: A list of inputs that should be passed to each
-        subgraph. Each subgraph is given an element of the list as input to its
-        first node.
-        :param sub_graphs: A list of Subgraph objects. Each stores the relevant
-        execution information for the particular subgraph, such as its current step
-        in the sub graph, inference state, output, and end step.
-
-        :returns: a list of outputs for all the completed Subgraph objects. Returned
-        in the same order that the subgraphs were passed to the function.
-        """
-        for i in range(len(sub_graphs)):
-            sub_graphs[i].output = self._run_next(
-                sub_graph_inputs[i], sub_graphs[i].inf, sub_graphs[i].step, loop=loop
-            )
-
-        # Execute all sub graphs until all graphs have been completed.
-        while any(not x.completed for x in sub_graphs):
-            for sub_graph in sub_graphs:
-                if not sub_graph.completed:
-                    # get the result for the completed operator; resolve its output
-                    if isinstance(sub_graph.output, asyncio.Future):
-                        await sub_graph.output
-                    operator_output = sub_graph.output.result()
-                    operator_output = sub_graph.parse_output(operator_output)
-
-                    # determine the next step for the particular operator, using
-                    # its previous output and previously stored step
-                    next_step = self.router.next(
-                        sub_graph.step, self.ops, operator_output
-                    )
-                    # update the step
-                    sub_graph.step = next_step
-
-                    # store the output for the next step. If the next step is
-                    # end step, this particular route has completed. Simply
-                    # update the output value
-                    if next_step in sub_graph.end:
-                        sub_graph.output = operator_output
-                        sub_graph.completed = True
-                    else:
-                        sub_graph.output = self._run_next(
-                            inp=operator_output,
-                            inference_state=sub_graph.inf,
-                            next_step=next_step,
-                            loop=loop,
-                        )
-
-        return [x.output for x in sub_graphs]
-
-    async def run_async(self, *args, inference_state: InferenceState, **kwargs):
-        """
-        Run through the operators using the provided router and scheduler.
-        The input to a given operator is the output of the previous operator.
-
-        :param inference_state: inference_state for the pipeline.
-        :param pipeline_state: pipeline_state for the pipeline. The values in the state
-            are created during pipeline creation and are read-only during inference.
-        """
-        loop = asyncio.get_running_loop()
-
-        next_step = self.router.START_ROUTE
-        operator_output = None
-
-        while next_step != self.router.END_ROUTE:
-            # Either a dictionary key or valid index
-
-            if next_step == self.router.SPLIT_ROUTE:
-                if operator_output is None:
-                    raise ValueError(
-                        f"{self.router.SPLIT_ROUTE} should appear after "
-                        f"{self.ROUTER.START_ROUTE}"
-                    )
-
-                operator_output = await self._apply_split(
-                    operator_output, inference_state, loop=loop
-                )
-                next_step = self.router.route[self.router.JOIN_ROUTE]
-                if next_step == self.router.END_ROUTE:
-                    return operator_output
-
-            if next_step == self.router.START_ROUTE:
-                outputs = run_func(
-                    *args,
-                    func=self._scheduler_group.submit,
-                    operator=self.ops[next_step],
-                    inference_state=inference_state,
-                    pipeline_state=self.pipeline_state,
-                    loop=loop,
-                    **kwargs,
-                )
-                await outputs
-                operator_output = outputs.result()
-
-            else:
-                outputs = self._run_next(
-                    inp=operator_output,
-                    next_step=next_step,
-                    inference_state=inference_state,
-                    loop=loop,
-                )
-                await outputs
-                operator_output = outputs.result()
-
-            if isinstance(operator_output, tuple):
-                state_update = operator_output[-1]
-                operator_output = operator_output[0]
-
-            next_step = self.router.next(next_step, self.ops, operator_output)
-            if state_update:
-                inference_state.update_state(state_update)
-        return operator_output
-
-    async def _apply_split(
-        self,
-        inp: Any,
-        inference_state: InferenceState,
-        loop: Optional[asyncio.AbstractEventLoop] = None,
-    ):
-        batches, orig_batch_size = self.expand_inputs(inp, 1)
-
-        # Create a list of SplitRoutes, per batch size 1
-        # Each SplitRoute object holds information about the particular path it
-        # follows. All start at the same step defined by SPLIT_ROUTE and start
-        # with the same inference_state.
-        split_graphs = [
-            SubGraph(
-                inf=copy.deepcopy(inference_state),
-                step=self.router.route[self.router.SPLIT_ROUTE],
-                end=[self.router.JOIN_ROUTE],
-            )
-            for i in range(len(batches))
-        ]
-
-        outputs = await self._run_sub_graphs(
-            sub_graph_inputs=batches, sub_graphs=split_graphs, loop=loop
-        )
-        return self.condense_inputs(outputs)
-
-    @staticmethod
-    def create(task: str, **kwargs) -> "Pipeline":
-        """
-        :param task: Pipeline task
-        :param kwargs: extra task specific kwargs to be passed to the Pipeline
-        :return: pipeline object initialized for the given task
-        """
-        pipeline = Operator.create(task=task, **kwargs)
-        if not isinstance(pipeline, Pipeline):
-            raise RuntimeError(
-                "Pipeline was not created for the given task. The "
-                "provided task should be registered using the OperatorRegistry"
-            )
-        return pipeline
-
-    def run(
-        self,
-        *args,
-        inference_state: InferenceState,
-        **kwargs,
-    ):
-        """
-        Run through the operators using the provided router and scheduler.
-        The input to a given operator is the output of the previous operator.
-
-        :param inference_state: inference_state for the pipeline.
-        :param pipeline_state: pipeline_state for the pipeline. The values in the state
-            are created during pipeline creation and are read-only during inference.
-        """
-        next_step = self.router.START_ROUTE
-        operator_output = None
-        while next_step != self.router.END_ROUTE:
-
-            # Split Grap Execution (i.e multiple subgraphs)
-            # NOTE: split_route should only appear after the start route node
-            if next_step == self.router.SPLIT_ROUTE:
-                if operator_output is None:
-                    raise ValueError(
-                        f"{self.router.SPLIT_ROUTE} should appear after "
-                        f"{self.router.START_ROUTE}"
-                    )
-
-                operator_output = asyncio.run(
-                    self._apply_split(operator_output, inference_state)
-                )
-                next_step = self.router.route[self.router.JOIN_ROUTE]
-                if next_step == self.router.END_ROUTE:
-                    return operator_output
-
-            if next_step == self.router.START_ROUTE:
-                operator_output = run_func(
-                    *args,
-                    func=self._scheduler_group.submit,
-                    operator=self.ops[next_step],
-                    inference_state=inference_state,
-                    pipeline_state=self.pipeline_state,
-                    **kwargs,
-                ).result()
-
-                if isinstance(operator_output, tuple):
-                    operator_output, state_update = (
-                        operator_output[0],
-                        operator_output[-1],
-                    )
-                    inference_state.update_state(state_update)
-
-                next_step = self.router.next(next_step, self.ops, operator_output)
-
-            else:
-                # Single graph execution
-                graph = SubGraph(
-                    inf=copy.deepcopy(inference_state),
-                    step=next_step,
-                    end=[self.router.SPLIT_ROUTE, self.router.END_ROUTE],
-                )
-
-                operator_output = asyncio.run(
-                    self._run_sub_graphs(
-                        sub_graph_inputs=[operator_output], sub_graphs=[graph]
-                    )
-                )[0]
-
-                inference_state = graph.inf
-                next_step = graph.step
-
-        return operator_output
-
-    def __call__(self, *args, **kwargs):
-        """
-        Consolidate any provided inference_state or pipeline_state objects and pass
-        any other operator inputs to run().
-
-        :return: output of the pipeline operators ran with the router for the given
-            input
-        """
-        if kwargs.get("inference_state"):
-            inference_state = kwargs.pop("inference_state")
-        else:
-            inference_state = InferenceState()
-            inference_state.create_state({})
-
-        kwargs["inference_state"] = inference_state
-
-        return self.run(*args, **kwargs)
-
-    def expand_inputs(self, *args, **kwargs):
-        """
-        Generic function to handle expanding values.
-        """
-        raise NotImplementedError(
-            "This function should be implemented for any router with split or join"
-            "nodes. expand_inputs will be called prior to the split node (stored in "
-            "the router's SPLIT_ROUTE attribute), expanding outputs for each output "
-            "such that there is a batch size of one per thread."
-        )
-
-    def condense_inputs(self, *args, **kwargs):
-        """
-        Generic function to handle condensing values.
-        """
-        raise NotImplementedError(
-            "This function should be implemented for any router with split or join "
-            "nodes. condense_inputs will be called after the join node (stored in the "
-            "router's JOIN_ROUTE attribute), condensing outputs from multiple threads."
-        )
-
-    def validate(self):
-        """
-        Validate that compatability of the router and operators provided.
-        """
-        router_validation = self.router.validate(self.ops)
-
-        if router_validation is False:
-            # default error message
-            op_types = [type(op) for op in self.ops]
-            raise ValueError(f"Invalid Router: {type(self.router)} for ops: {op_types}")
-        elif isinstance(router_validation, str):
-            raise ValueError(f"Invalid Router for operators: {router_validation}")
diff --git a/src/deepsparse/v2/task.py b/src/deepsparse/v2/task.py
deleted file mode 100644
index f1f4fc6d66..0000000000
--- a/src/deepsparse/v2/task.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Classes and implementations for supported tasks in the DeepSparse pipeline and system
-"""
-
-import importlib
-import logging
-import os
-import sys
-from collections import namedtuple
-from typing import Iterable, List, Optional, Tuple
-
-
-_LOGGER = logging.getLogger(__name__)
-
-__all__ = ["SupportedTasks", "AliasedTask"]
-
-
-class AliasedTask:
-    """
-    A task that can have multiple aliases to match to.
-    For example, question_answering which can alias to qa as well
-
-    :param name: the name of the task such as question_answering or text_classification
-    :param aliases: the aliases the task can go by in addition to the name such as
-        qa, glue, sentiment_analysis, etc
-    """
-
-    def __init__(self, name: str, aliases: List[str]):
-        self._name = name
-        self._aliases = aliases
-
-    @property
-    def name(self) -> str:
-        """
-        :return: the name of the task such as question_answering
-        """
-        return self._name
-
-    @property
-    def aliases(self) -> List[str]:
-        """
-        :return: the aliases the task can go by such as qa, glue, sentiment_analysis
-        """
-        return self._aliases
-
-    def matches(self, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether the given instance matches.
-            Checks the current name as well as any aliases.
-            Everything is compared at lower case and "-" and whitespace
-            are replaced with "_".
-        :return: True if task does match the current instance, False otherwise
-        """
-        task = task.lower().replace("-", "_")
-
-        # replace whitespace with "_"
-        task = "_".join(task.split())
-
-        return task == self.name or task in self.aliases
-
-
-class SupportedTasks:
-    """
-    The supported tasks in the DeepSparse pipeline and system
-    """
-
-    text_generation = namedtuple(
-        "text_generation", ["text_generation", "opt", "bloom"]
-    )(
-        text_generation=AliasedTask("text_generation", []),
-        opt=AliasedTask("opt", []),
-        bloom=AliasedTask("bloom", []),
-    )
-
-    all_task_categories = [text_generation]
-
-    @classmethod
-    def check_register_task(
-        cls, task: str, extra_tasks: Optional[Iterable[str]] = None
-    ):
-        """
-        :param task: task name to validate and import dependencies for
-        :param extra_tasks: valid task names that are not included in supported tasks.
-            i.e. tasks registered to Pipeline at runtime
-        """
-        if cls.is_text_generation(task):
-            import deepsparse.v2.text_generation.pipeline  # noqa: F401
-
-        all_tasks = set(cls.task_names() + (list(extra_tasks or [])))
-        if task not in all_tasks:
-            raise ValueError(
-                f"Unknown Pipeline task {task}. Currently supported tasks are "
-                f"{list(all_tasks)}"
-            )
-
-    @classmethod
-    def is_text_generation(cls, task: str) -> bool:
-        """
-        :param task: the name of the task to check whether it is a text generation task
-            such as codegen
-        :return: True if it is a text generation task, False otherwise
-        """
-        return any(
-            text_generation_task.matches(task)
-            for text_generation_task in cls.text_generation
-        )
-
-    @classmethod
-    def task_names(cls):
-        task_names = ["custom"]
-        for task_category in cls.all_task_categories:
-            for task in task_category:
-                unique_aliases = (
-                    alias for alias in task._aliases if alias != task._name
-                )
-                task_names += (task._name, *unique_aliases)
-        return task_names
-
-
-def dynamic_import_task(module_or_path: str) -> str:
-    """
-    Dynamically imports `module` with importlib, and returns the `TASK`
-    attribute on the module (something like `importlib.import_module(module).TASK`).
-
-    Example contents of `module`:
-    ```python
-    from deepsparse.pipeline import Pipeline
-    from deepsparse.transformers.pipelines.question_answering import (
-        QuestionAnsweringPipeline,
-    )
-
-    TASK = "my_qa_task"
-    Pipeline.register(TASK)(QuestionAnsweringPipeline)
-    ```
-
-    NOTE: this modifies `sys.path`.
-
-    :raises FileNotFoundError: if path does not exist
-    :raises RuntimeError: if the imported module does not contain `TASK`
-    :raises RuntimeError: if the module doesn't register the task
-    :return: The task from the imported module.
-    """
-    parent_dir, module_name = _split_dir_and_name(module_or_path)
-    if not os.path.exists(os.path.join(parent_dir, module_name + ".py")):
-        raise FileNotFoundError(
-            f"Unable to find file for {module_or_path}. "
-            f"Looked for {module_name}.py under {parent_dir if parent_dir else '.'}"
-        )
-
-    # add parent_dir to sys.path so we can import the file as a module
-    sys.path.append(os.curdir)
-    if parent_dir:
-        _LOGGER.info(f"Adding {parent_dir} to sys.path")
-        sys.path.append(parent_dir)
-
-    # do the import
-    _LOGGER.info(f"Importing '{module_name}'")
-    module_or_path = importlib.import_module(module_name)
-
-    if not hasattr(module_or_path, "TASK"):
-        raise RuntimeError(
-            "When using --task import:<module>, "
-            "module must set the `TASK` attribute."
-        )
-
-    task = getattr(module_or_path, "TASK")
-    _LOGGER.info(f"Using task={repr(task)}")
-
-    return task
-
-
-def _split_dir_and_name(module_or_path: str) -> Tuple[str, str]:
-    """
-    Examples:
-    - `a` -> `("", "a")`
-    - `a.b` -> `("a", "b")`
-    - `a.b.c` -> `("a/b", "c")`
-
-    :return: module split into directory & name
-    """
-    if module_or_path.endswith(".py"):
-        # assume path
-        split_char = os.sep
-        module_or_path = module_or_path.replace(".py", "")
-    else:
-        # assume module
-        split_char = "."
-    *dirs, module_name = module_or_path.split(split_char)
-    parent_dir = os.sep if dirs == [""] else os.sep.join(dirs)
-    return parent_dir, module_name
diff --git a/src/deepsparse/yolact/annotate.py b/src/deepsparse/yolact/annotate.py
index 18e7d8c952..7fdc837aa7 100644
--- a/src/deepsparse/yolact/annotate.py
+++ b/src/deepsparse/yolact/annotate.py
@@ -69,7 +69,7 @@
 import click
 
 import cv2
-from deepsparse.pipeline import Pipeline
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.utils.annotate import (
     annotate,
     get_annotations_save_dir,
diff --git a/src/deepsparse/yolact/pipelines.py b/src/deepsparse/yolact/pipelines.py
index a0e0968dce..99ab3cc876 100644
--- a/src/deepsparse/yolact/pipelines.py
+++ b/src/deepsparse/yolact/pipelines.py
@@ -18,7 +18,7 @@
 import numpy
 
 import torch
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.utils import model_to_path
 from deepsparse.yolact.schemas import YOLACTInputSchema, YOLACTOutputSchema
 from deepsparse.yolact.utils import (
diff --git a/src/deepsparse/yolo/pipelines.py b/src/deepsparse/yolo/pipelines.py
index 935fc9a1d4..513c62c1fb 100644
--- a/src/deepsparse/yolo/pipelines.py
+++ b/src/deepsparse/yolo/pipelines.py
@@ -18,7 +18,7 @@
 import numpy
 import onnx
 
-from deepsparse.pipeline import Pipeline
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.utils import model_to_path
 from deepsparse.yolo.schemas import YOLOInput, YOLOOutput
 from deepsparse.yolo.utils import (
diff --git a/src/deepsparse/yolov8/pipelines.py b/src/deepsparse/yolov8/pipelines.py
index 4264b5f902..f64fac2cdd 100644
--- a/src/deepsparse/yolov8/pipelines.py
+++ b/src/deepsparse/yolov8/pipelines.py
@@ -19,7 +19,7 @@
 import numpy
 
 import torch
-from deepsparse import Pipeline
+from deepsparse.legacy import Pipeline
 from deepsparse.yolo import YOLOOutput as YOLODetOutput
 from deepsparse.yolo import YOLOPipeline
 from deepsparse.yolov8.schemas import YOLOSegOutput
diff --git a/tests/deepsparse/evaluation/test_utils.py b/tests/deepsparse/evaluation/test_utils.py
index 047799367d..648a521a3d 100644
--- a/tests/deepsparse/evaluation/test_utils.py
+++ b/tests/deepsparse/evaluation/test_utils.py
@@ -47,12 +47,12 @@ def torch_target():
 
 def test_initialize_model_from_target_pipeline_onnx(pipeline_target):
     model = text_generation_model_from_target(pipeline_target, "onnxruntime")
-    assert model.engine_type == "onnxruntime"
+    assert model.ops.get("single_engine")._engine_type == "onnxruntime"
 
 
 def test_initialize_model_from_target_pipeline_deepsparse(pipeline_target):
     model = text_generation_model_from_target(pipeline_target, "deepsparse")
-    assert model.engine_type == "deepsparse"
+    assert model.ops.get("single_engine")._engine_type == "deepsparse"
 
 
 def test_initialize_model_from_target_torch(torch_target):
diff --git a/tests/deepsparse/image_classification/test_pipelines.py b/tests/deepsparse/image_classification/legacy/test_pipelines.py
similarity index 98%
rename from tests/deepsparse/image_classification/test_pipelines.py
rename to tests/deepsparse/image_classification/legacy/test_pipelines.py
index 1d74831679..5edf500fd4 100644
--- a/tests/deepsparse/image_classification/test_pipelines.py
+++ b/tests/deepsparse/image_classification/legacy/test_pipelines.py
@@ -16,11 +16,11 @@
 import numpy
 
 import pytest
-from deepsparse import Pipeline
 from deepsparse.image_classification.constants import (
     IMAGENET_RGB_MEANS,
     IMAGENET_RGB_STDS,
 )
+from deepsparse.legacy import Pipeline
 from sparsezoo import Model
 from sparsezoo.utils import load_numpy_list
 from tests.utils import mock_engine
diff --git a/tests/deepsparse/v2/test_image_classification.py b/tests/deepsparse/image_classification/test_image_classification.py
similarity index 88%
rename from tests/deepsparse/v2/test_image_classification.py
rename to tests/deepsparse/image_classification/test_image_classification.py
index c6b04e6f2f..5c0e0761df 100644
--- a/tests/deepsparse/v2/test_image_classification.py
+++ b/tests/deepsparse/image_classification/test_image_classification.py
@@ -15,10 +15,8 @@
 import numpy
 
 import pytest
-from deepsparse.v2.image_classification import ImageClassificationPipeline
-from deepsparse.v2.image_classification.preprocess_operator import (
-    ImageClassificationInput,
-)
+from deepsparse.image_classification import ImageClassificationPipeline
+from deepsparse.image_classification.preprocess_operator import ImageClassificationInput
 from tests.deepsparse.pipelines.data_helpers import computer_vision
 
 
diff --git a/tests/deepsparse/pipelines/dynamic_import_modules/valid_dynamic_import.py b/tests/deepsparse/pipelines/dynamic_import_modules/valid_dynamic_import.py
index ec05003822..8f3017f248 100644
--- a/tests/deepsparse/pipelines/dynamic_import_modules/valid_dynamic_import.py
+++ b/tests/deepsparse/pipelines/dynamic_import_modules/valid_dynamic_import.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from deepsparse.pipeline import Pipeline
+# TODO: update to test the new Pipeline
+from deepsparse.legacy.pipeline import Pipeline
 
 
 TASK = "unit_test_task"
diff --git a/tests/deepsparse/v2/test_basic_pipeline.py b/tests/deepsparse/pipelines/test_basic_pipeline.py
similarity index 88%
rename from tests/deepsparse/v2/test_basic_pipeline.py
rename to tests/deepsparse/pipelines/test_basic_pipeline.py
index bedddd537a..c98ffd538c 100644
--- a/tests/deepsparse/v2/test_basic_pipeline.py
+++ b/tests/deepsparse/pipelines/test_basic_pipeline.py
@@ -20,10 +20,10 @@
 
 from pydantic import BaseModel
 
-from deepsparse.v2 import Pipeline
-from deepsparse.v2.operators import Operator
-from deepsparse.v2.routers import LinearRouter
-from deepsparse.v2.schedulers import OperatorScheduler
+from deepsparse import Pipeline
+from deepsparse.operators import Operator
+from deepsparse.routers import LinearRouter
+from deepsparse.schedulers import OperatorScheduler
 
 
 class IntSchema(BaseModel):
diff --git a/tests/deepsparse/pipelines/test_bucketing.py b/tests/deepsparse/pipelines/test_bucketing.py
index f3ef345245..f5963658d2 100644
--- a/tests/deepsparse/pipelines/test_bucketing.py
+++ b/tests/deepsparse/pipelines/test_bucketing.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import pytest
-from deepsparse import BucketingPipeline, Pipeline
+from deepsparse.legacy import BucketingPipeline, Pipeline
 from tests.utils import mock_engine
 
 
diff --git a/tests/deepsparse/pipelines/test_clip.py b/tests/deepsparse/pipelines/test_clip.py
index b085686186..cb8bfeb97b 100644
--- a/tests/deepsparse/pipelines/test_clip.py
+++ b/tests/deepsparse/pipelines/test_clip.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import pytest
-from deepsparse import BasePipeline, Pipeline
 from deepsparse.clip import (
     CLIPCaptionInput,
     CLIPCaptionPipeline,
@@ -48,6 +47,8 @@ def text_input():
 @pytest.mark.skip(reason="No CLIP models currently available to run tests")
 @mock_engine(rng_seed=0)
 def test_visual_clip(engine, visual_input):
+    from deepsparse import Pipeline
+
     model_path = visual_input[-1]
     pipeline = Pipeline.create(task="clip_visual", model_path=model_path)
     assert isinstance(pipeline, CLIPVisualPipeline)
@@ -59,6 +60,8 @@ def test_visual_clip(engine, visual_input):
 @pytest.mark.skip(reason="No CLIP models curently available to run tests")
 @mock_engine(rng_seed=0)
 def test_text_clip(engine, text_input):
+    from deepsparse import Pipeline
+
     model_path = text_input[-1]
     pipeline = Pipeline.create(task="clip_text", model_path=model_path)
     assert isinstance(pipeline, CLIPTextPipeline)
@@ -70,6 +73,8 @@ def test_text_clip(engine, text_input):
 @pytest.mark.skip(reason="No CLIP models currently available to run tests")
 @mock_engine(rng_seed=0)
 def test_zero_shot(engine, visual_input, text_input):
+    from deepsparse.legacy import BasePipeline
+
     model_path_text = text_input[-1]
     model_path_visual = visual_input[-1]
     kwargs = {
@@ -88,6 +93,8 @@ def test_zero_shot(engine, visual_input, text_input):
 @pytest.mark.skip(reason="No CLIP models currently available to run tests")
 @mock_engine(rng_seed=0)
 def test_caption(engine, visual_input, text_input):
+    from deepsparse.legacy import BasePipeline
+
     model_path_visual = text_input[-1]
     model_path_text = text_input[-1]
     model_path_decoder = None
diff --git a/tests/deepsparse/pipelines/test_custom_pipeline.py b/tests/deepsparse/pipelines/test_custom_pipeline.py
index 061b59ae03..34876f99c0 100644
--- a/tests/deepsparse/pipelines/test_custom_pipeline.py
+++ b/tests/deepsparse/pipelines/test_custom_pipeline.py
@@ -52,6 +52,9 @@ def model_path():
     ],
 )
 def test_custom_pipeline_task_names(task_name):
+    # TODO: update test to be compatible with new pipeline
+    from deepsparse.legacy.pipeline import Pipeline
+
     cls = Pipeline._get_task_constructor(task_name)
     assert cls == CustomTaskPipeline
 
diff --git a/tests/deepsparse/pipelines/test_dynamic_import.py b/tests/deepsparse/pipelines/test_dynamic_import.py
index 63096e2365..4d5a9333ac 100644
--- a/tests/deepsparse/pipelines/test_dynamic_import.py
+++ b/tests/deepsparse/pipelines/test_dynamic_import.py
@@ -15,8 +15,10 @@
 import os
 
 import pytest
-from deepsparse.pipeline import _REGISTERED_PIPELINES, Pipeline
-from deepsparse.tasks import _split_dir_and_name, dynamic_import_task
+
+# TODO: update to test the new Pipeline
+from deepsparse.legacy.pipeline import _REGISTERED_PIPELINES, Pipeline
+from deepsparse.legacy.tasks import _split_dir_and_name, dynamic_import_task
 
 
 def test_split_dir_and_name_module():
diff --git a/tests/deepsparse/pipelines/test_pipeline.py b/tests/deepsparse/pipelines/test_pipeline.py
index 945959c679..6ad1c71fe4 100644
--- a/tests/deepsparse/pipelines/test_pipeline.py
+++ b/tests/deepsparse/pipelines/test_pipeline.py
@@ -18,8 +18,10 @@
 
 import flaky
 import pytest
-from deepsparse.base_pipeline import BasePipeline
-from deepsparse.pipeline import (
+from deepsparse.legacy.base_pipeline import BasePipeline
+
+# TODO: update to test the new pipeline
+from deepsparse.legacy.pipeline import (
     Pipeline,
     PipelineConfig,
     _initialize_executor_and_workers,
diff --git a/tests/deepsparse/v2/__init__.py b/tests/deepsparse/schedulers/__init__.py
similarity index 100%
rename from tests/deepsparse/v2/__init__.py
rename to tests/deepsparse/schedulers/__init__.py
diff --git a/tests/deepsparse/v2/schedulers/test_continuous_batching_scheduler.py b/tests/deepsparse/schedulers/test_continuous_batching_scheduler.py
similarity index 94%
rename from tests/deepsparse/v2/schedulers/test_continuous_batching_scheduler.py
rename to tests/deepsparse/schedulers/test_continuous_batching_scheduler.py
index 85cac323e0..6d56d71eff 100644
--- a/tests/deepsparse/v2/schedulers/test_continuous_batching_scheduler.py
+++ b/tests/deepsparse/schedulers/test_continuous_batching_scheduler.py
@@ -16,8 +16,8 @@
 
 import numpy
 
-from deepsparse.v2.operators import EngineOperator
-from deepsparse.v2.schedulers import ContinuousBatchingScheduler
+from deepsparse.operators import EngineOperator
+from deepsparse.schedulers import ContinuousBatchingScheduler
 
 
 def test_continuous_batching_executor_thread():
diff --git a/tests/deepsparse/v2/integration_tests/__init__.py b/tests/deepsparse/schedulers/utils/__init__.py
similarity index 100%
rename from tests/deepsparse/v2/integration_tests/__init__.py
rename to tests/deepsparse/schedulers/utils/__init__.py
diff --git a/tests/deepsparse/v2/schedulers/utils/test_continuous_batching_executor.py b/tests/deepsparse/schedulers/utils/test_continuous_batching_executor.py
similarity index 96%
rename from tests/deepsparse/v2/schedulers/utils/test_continuous_batching_executor.py
rename to tests/deepsparse/schedulers/utils/test_continuous_batching_executor.py
index 2b7c5a5e68..6389a321d4 100644
--- a/tests/deepsparse/v2/schedulers/utils/test_continuous_batching_executor.py
+++ b/tests/deepsparse/schedulers/utils/test_continuous_batching_executor.py
@@ -17,8 +17,8 @@
 
 import numpy
 
-from deepsparse.v2.operators import EngineOperator
-from deepsparse.v2.schedulers.utils import (
+from deepsparse.operators import EngineOperator
+from deepsparse.schedulers.utils import (
     ContinuousBatchingExecutorThread,
     ContinuousBatchingQueues,
 )
diff --git a/tests/deepsparse/v2/schedulers/utils/test_continuous_batching_queues.py b/tests/deepsparse/schedulers/utils/test_continuous_batching_queues.py
similarity index 99%
rename from tests/deepsparse/v2/schedulers/utils/test_continuous_batching_queues.py
rename to tests/deepsparse/schedulers/utils/test_continuous_batching_queues.py
index 1713d54f82..2ef78ccbd2 100644
--- a/tests/deepsparse/v2/schedulers/utils/test_continuous_batching_queues.py
+++ b/tests/deepsparse/schedulers/utils/test_continuous_batching_queues.py
@@ -16,7 +16,7 @@
 from threading import Thread
 
 import pytest
-from deepsparse.v2.schedulers.utils import (
+from deepsparse.schedulers.utils import (
     ContinuousBatchingQueue,
     ContinuousBatchingQueues,
     QueueEntry,
diff --git a/tests/deepsparse/v2/schedulers/__init__.py b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/__init__.py
similarity index 100%
rename from tests/deepsparse/v2/schedulers/__init__.py
rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/__init__.py
diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/configs/codegen.yaml b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/codegen.yaml
similarity index 100%
rename from tests/deepsparse/transformers/pipelines/integration_tests/configs/codegen.yaml
rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/codegen.yaml
diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/configs/gpt_neo.yaml b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/gpt_neo.yaml
similarity index 100%
rename from tests/deepsparse/transformers/pipelines/integration_tests/configs/gpt_neo.yaml
rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/gpt_neo.yaml
diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/configs/opt.yaml b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/opt.yaml
similarity index 100%
rename from tests/deepsparse/transformers/pipelines/integration_tests/configs/opt.yaml
rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs/opt.yaml
diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/helpers.py b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/helpers.py
similarity index 100%
rename from tests/deepsparse/transformers/pipelines/integration_tests/helpers.py
rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/helpers.py
diff --git a/tests/deepsparse/transformers/pipelines/integration_tests/test_llms.py b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/test_llms.py
similarity index 96%
rename from tests/deepsparse/transformers/pipelines/integration_tests/test_llms.py
rename to tests/deepsparse/transformers/pipelines/legacy/integration_tests/test_llms.py
index 33dca47bfa..eb02b91ba9 100644
--- a/tests/deepsparse/transformers/pipelines/integration_tests/test_llms.py
+++ b/tests/deepsparse/transformers/pipelines/legacy/integration_tests/test_llms.py
@@ -42,10 +42,13 @@
 import numpy
 
 import pytest
-from deepsparse import Pipeline
-from deepsparse.transformers.pipelines.text_generation import TextGenerationOutput
+
+# NOTE: this tests the legacy text generation pipeline. integration tests exist
+# for the new pipeline under v2
+from deepsparse.legacy import Pipeline
+from deepsparse.transformers.schemas.text_generation_schemas import TextGenerationOutput
 from sparsezoo import Model
-from tests.deepsparse.transformers.pipelines.integration_tests.helpers import (
+from tests.deepsparse.transformers.pipelines.legacy.integration_tests.helpers import (
     TorchGroundTruthSource,
     parse_params,
     validate_internal_kv_cache,
@@ -53,7 +56,9 @@
 )
 
 
-CONFIGS_DIRECTORY = "tests/deepsparse/transformers/pipelines/integration_tests/configs"
+CONFIGS_DIRECTORY = (
+    "tests/deepsparse/transformers/pipelines/legacy/integration_tests/configs"
+)
 
 
 @pytest.fixture()
diff --git a/tests/deepsparse/transformers/pipelines/test_text_generation.py b/tests/deepsparse/transformers/pipelines/test_text_generation.py
index ba2a52c40e..4b783ad53d 100644
--- a/tests/deepsparse/transformers/pipelines/test_text_generation.py
+++ b/tests/deepsparse/transformers/pipelines/test_text_generation.py
@@ -17,7 +17,9 @@
 import numpy
 
 import pytest
-from deepsparse import Pipeline
+
+# TODO: update to use/be compliant with new pipeline
+from deepsparse.legacy.pipeline import Pipeline
 from deepsparse.transformers.utils.helpers import prepends_bos_token
 
 
diff --git a/tests/deepsparse/v2/schedulers/utils/__init__.py b/tests/deepsparse/transformers/text_generation/__init__.py
similarity index 100%
rename from tests/deepsparse/v2/schedulers/utils/__init__.py
rename to tests/deepsparse/transformers/text_generation/__init__.py
diff --git a/tests/deepsparse/transformers/text_generation/integration_tests/__init__.py b/tests/deepsparse/transformers/text_generation/integration_tests/__init__.py
new file mode 100644
index 0000000000..0c44f887a4
--- /dev/null
+++ b/tests/deepsparse/transformers/text_generation/integration_tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/deepsparse/v2/integration_tests/configs/codegen.yaml b/tests/deepsparse/transformers/text_generation/integration_tests/configs/codegen.yaml
similarity index 100%
rename from tests/deepsparse/v2/integration_tests/configs/codegen.yaml
rename to tests/deepsparse/transformers/text_generation/integration_tests/configs/codegen.yaml
diff --git a/tests/deepsparse/v2/integration_tests/configs/gpt_neo.yaml b/tests/deepsparse/transformers/text_generation/integration_tests/configs/gpt_neo.yaml
similarity index 100%
rename from tests/deepsparse/v2/integration_tests/configs/gpt_neo.yaml
rename to tests/deepsparse/transformers/text_generation/integration_tests/configs/gpt_neo.yaml
diff --git a/tests/deepsparse/v2/integration_tests/configs/opt.yaml b/tests/deepsparse/transformers/text_generation/integration_tests/configs/opt.yaml
similarity index 100%
rename from tests/deepsparse/v2/integration_tests/configs/opt.yaml
rename to tests/deepsparse/transformers/text_generation/integration_tests/configs/opt.yaml
diff --git a/tests/deepsparse/v2/integration_tests/helpers.py b/tests/deepsparse/transformers/text_generation/integration_tests/helpers.py
similarity index 100%
rename from tests/deepsparse/v2/integration_tests/helpers.py
rename to tests/deepsparse/transformers/text_generation/integration_tests/helpers.py
diff --git a/tests/deepsparse/v2/integration_tests/test_llms.py b/tests/deepsparse/transformers/text_generation/integration_tests/test_llms.py
similarity index 96%
rename from tests/deepsparse/v2/integration_tests/test_llms.py
rename to tests/deepsparse/transformers/text_generation/integration_tests/test_llms.py
index c53899f30c..45ba1135b7 100644
--- a/tests/deepsparse/v2/integration_tests/test_llms.py
+++ b/tests/deepsparse/transformers/text_generation/integration_tests/test_llms.py
@@ -39,18 +39,20 @@
 import numpy
 
 import pytest
-from deepsparse.transformers.pipelines.text_generation import TextGenerationOutput
-from deepsparse.v2.pipeline import Pipeline
-from deepsparse.v2.text_generation import TextGenerationPipeline
+from deepsparse import Pipeline
+from deepsparse.transformers.pipelines.text_generation import TextGenerationPipeline
+from deepsparse.transformers.schemas.text_generation_schemas import TextGenerationOutput
 from sparsezoo import Model
-from tests.deepsparse.transformers.pipelines.integration_tests.helpers import (
+from tests.deepsparse.transformers.pipelines.legacy.integration_tests.helpers import (
     TorchGroundTruthSource,
     parse_params,
     validate_internal_kv_cache,
 )
 
 
-CONFIGS_DIRECTORY = "tests/deepsparse/v2/integration_tests/configs"
+CONFIGS_DIRECTORY = (
+    "tests/deepsparse/transformers/text_generation/integration_tests/configs"
+)
 
 
 @pytest.fixture()
@@ -135,7 +137,7 @@ def test_ort_single_token_prefill(self, setup):
 
         pipeline = self.get_pipeline(
             prompt_sequence_length=1,
-            engine_kwargs={"engine_type": "onnxruntime"},
+            engine_type="onnxruntime",
         )
         output = pipeline(
             prompt=self.prompt,
@@ -163,7 +165,7 @@ def test_ort_multi_token_prefill(self, setup):
                 "Cannot run ORT pipeline with the internal deepsparse cache enabled."
             )
         pipeline = self.get_pipeline(
-            engine_kwargs={"engine_type": "onnxruntime"},
+            engine_type="onnxruntime",
         )
         output = pipeline(
             prompt=self.prompt,
@@ -244,7 +246,7 @@ def test_inference_no_kv_cache_ort(self, setup):
     def _test_inference_no_kv_cache(self, engine_type):
         model_path_no_cache = self._get_model_path_no_cache()
         pipeline = self.get_pipeline(
-            model_path=model_path_no_cache, engine_kwargs={"engine_type": engine_type}
+            model_path=model_path_no_cache, engine_type=engine_type
         )
         assert not pipeline.cache_support_enabled, (
             "This pipeline test inference using non-kv cache "
diff --git a/tests/deepsparse/v2/unit/text_generation/conftest.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/conftest.py
similarity index 96%
rename from tests/deepsparse/v2/unit/text_generation/conftest.py
rename to tests/deepsparse/transformers/text_generation/unit/text_generation/conftest.py
index 3840a9bb0a..7f0251a4d7 100644
--- a/tests/deepsparse/v2/unit/text_generation/conftest.py
+++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/conftest.py
@@ -20,13 +20,16 @@
 import pytest
 from deepsparse.transformers.helpers import get_deployment_path
 from deepsparse.transformers.pipelines.text_generation import (
+    NLEngineOperator,
+    TokenGeneratorOperator,
+)
+from deepsparse.transformers.schemas.text_generation_schemas import (
     GenerationDefaults,
     TextGenerationInput,
 )
 from deepsparse.transformers.utils import DecoderKVCache
 from deepsparse.transformers.utils.helpers import initialize_kv_cache_state
-from deepsparse.v2 import InferenceState, PipelineState
-from deepsparse.v2.text_generation import NLEngineOperator, TokenGeneratorOperator
+from deepsparse.utils import InferenceState, PipelineState
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/deepsparse/v2/unit/text_generation/test_kv_cache.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_kv_cache.py
similarity index 93%
rename from tests/deepsparse/v2/unit/text_generation/test_kv_cache.py
rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_kv_cache.py
index 0c6e42503a..c855dc2521 100644
--- a/tests/deepsparse/v2/unit/text_generation/test_kv_cache.py
+++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_kv_cache.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from deepsparse.v2.text_generation import KVCacheCreator, KVCacheCreatorInput
+from deepsparse.transformers.pipelines.text_generation import (
+    KVCacheCreator,
+    KVCacheCreatorInput,
+)
 
 
 def test_kv_cache_creation(
diff --git a/tests/deepsparse/v2/unit/text_generation/test_misc.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_misc.py
similarity index 89%
rename from tests/deepsparse/v2/unit/text_generation/test_misc.py
rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_misc.py
index f215e2aedb..4db36de7ad 100644
--- a/tests/deepsparse/v2/unit/text_generation/test_misc.py
+++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_misc.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from deepsparse.v2.text_generation import CompilePromptLogits
-from deepsparse.v2.text_generation.nl_engine_operator import NLEngineOutputs
+from deepsparse.transformers.pipelines.text_generation import CompilePromptLogits
+from deepsparse.transformers.pipelines.text_generation.nl_engine_operator import (
+    NLEngineOutputs,
+)
 
 
 def test_compile_logits(mock_logits, mock_inference_state, mock_tokens, mock_kv_cache):
diff --git a/tests/deepsparse/v2/unit/text_generation/test_process_inputs.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_process_inputs.py
similarity index 90%
rename from tests/deepsparse/v2/unit/text_generation/test_process_inputs.py
rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_process_inputs.py
index 02f4540c44..4362d7f7d8 100644
--- a/tests/deepsparse/v2/unit/text_generation/test_process_inputs.py
+++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_process_inputs.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from deepsparse.transformers.pipelines.text_generation import GenerationDefaults
-from deepsparse.v2.text_generation import ProcessInputsTextGeneration
+from deepsparse.legacy.transformers.pipelines.text_generation import GenerationDefaults
+from deepsparse.transformers.pipelines.text_generation import (
+    ProcessInputsTextGeneration,
+)
 
 
 def test_process_inputs(
diff --git a/tests/deepsparse/v2/unit/text_generation/test_single_token_engine.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_single_token_engine.py
similarity index 98%
rename from tests/deepsparse/v2/unit/text_generation/test_single_token_engine.py
rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_single_token_engine.py
index 19bb4d1c4a..b902417efc 100644
--- a/tests/deepsparse/v2/unit/text_generation/test_single_token_engine.py
+++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_single_token_engine.py
@@ -14,7 +14,7 @@
 
 import numpy
 
-from deepsparse.v2.text_generation import (
+from deepsparse.transformers.pipelines.text_generation import (
     AutoRegressiveOperatorPreprocess,
     NLEngineInputs,
 )
diff --git a/tests/deepsparse/v2/unit/text_generation/test_token_generation.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_token_generation.py
similarity index 95%
rename from tests/deepsparse/v2/unit/text_generation/test_token_generation.py
rename to tests/deepsparse/transformers/text_generation/unit/text_generation/test_token_generation.py
index d04f863171..613f1106b3 100644
--- a/tests/deepsparse/v2/unit/text_generation/test_token_generation.py
+++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/test_token_generation.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 import numpy
 
-from deepsparse.v2.text_generation import (
+from deepsparse.transformers.pipelines.text_generation import (
     GenerateNewTokenOperator,
     PrepareGeneration,
     TokenGeneratorOperator,
 )
-from deepsparse.v2.text_generation.nl_engine_operator import NLEngineOutputs
+from deepsparse.transformers.pipelines.text_generation.nl_engine_operator import (
+    NLEngineOutputs,
+)
 
 
 def test_prep_for_generation(
diff --git a/tests/deepsparse/v2/unit/text_generation/text_multi_token_engine.py b/tests/deepsparse/transformers/text_generation/unit/text_generation/text_multi_token_engine.py
similarity index 96%
rename from tests/deepsparse/v2/unit/text_generation/text_multi_token_engine.py
rename to tests/deepsparse/transformers/text_generation/unit/text_generation/text_multi_token_engine.py
index d2c822af4c..42dd1b1c97 100644
--- a/tests/deepsparse/v2/unit/text_generation/text_multi_token_engine.py
+++ b/tests/deepsparse/transformers/text_generation/unit/text_generation/text_multi_token_engine.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from deepsparse.v2.text_generation import MultiEnginePrefill
+from deepsparse.transformers.pipelines.text_generation import MultiEnginePrefill
 
 
 def test_mult_engine_preprocess(