Skip to content

Commit

Permalink
Support for custom evaluators
Browse files Browse the repository at this point in the history
* Move evaluation registration out of OliveEvaluator into its own class
* Evaluators are registered using both the Framework and the class name
* Add "type", "type_args", "user_script" and "script_dir" to
  OliveEvaluationConfig to support user specific custom implementation
* Remove some dead code
  • Loading branch information
shaahji committed Aug 8, 2024
1 parent 2c8abec commit cc73974
Showing 30 changed files with 397 additions and 258 deletions.
3 changes: 1 addition & 2 deletions olive/engine/engine.py
Original file line number Diff line number Diff line change
@@ -1079,10 +1079,9 @@ def _evaluate_model(
return signal

# evaluate model
metrics = evaluator_config.metrics if evaluator_config else []
if self.target.system_type != SystemType.AzureML:
model_config = self.cache.prepare_resources_for_local(model_config)
signal = self.target.evaluate_model(model_config, metrics, accelerator_spec)
signal = self.target.evaluate_model(model_config, evaluator_config, accelerator_spec)

# cache evaluation
self._cache_evaluation(model_id_with_accelerator, signal)
13 changes: 13 additions & 0 deletions olive/evaluator/__init__.py
Original file line number Diff line number Diff line change
@@ -2,3 +2,16 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

from olive.evaluator.metric import Metric, SubMetric
from olive.evaluator.metric_result import MetricResult, SubMetricResult, flatten_metric_result
from olive.evaluator.olive_evaluator import OliveEvaluator

__all__ = [
"flatten_metric_result",
"Metric",
"MetricResult",
"OliveEvaluator",
"SubMetric",
"SubMetricResult",
]
333 changes: 182 additions & 151 deletions olive/evaluator/olive_evaluator.py

Large diffs are not rendered by default.

68 changes: 68 additions & 0 deletions olive/evaluator/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import inspect
import logging
from typing import ClassVar, Dict

logger = logging.getLogger(__name__)


class Registry:
"""Registry for olive model evaluators."""

_REGISTRY: ClassVar[Dict] = {}

@classmethod
def register(cls, name: str = None):
"""Register an evaluator to the registry.
Args:
name (str): the name of the evaluator, if name is None, uses the class name
Returns:
Callable: the decorator function
"""

def decorator(component):
component_name = name if name is not None else component.__name__
if component_name in cls._REGISTRY:
component_1 = cls._REGISTRY[component_name]
component_2 = component

component_file_1 = inspect.getfile(component_1)
component_file_2 = inspect.getfile(component_2)

_, component_line_no_1 = inspect.getsourcelines(component_1)
_, component_line_no_2 = inspect.getsourcelines(component_2)

if (component_file_1 != component_file_2) or (component_line_no_1 != component_line_no_2):
logger.critical(
"%s: Duplicate evaluator registration.\n"
"\tPrevious Registration: %s:%d\n"
"\tCurrent Registration: %s:%d.",
component_name,
component_file_1,
component_line_no_1,
component_file_2,
component_line_no_2,
)
cls._REGISTRY[component_name] = component
return component

return decorator

@classmethod
def get(cls, name: str):
"""Get an evaluator, by name, from the registry.
Args:
name (str): the name of the evaluator
Returns:
Type: the OliveEvaluator class
"""
return cls._REGISTRY.get(name)
7 changes: 4 additions & 3 deletions olive/passes/onnx/inc_quantization.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@
from olive.data.config import DataConfig
from olive.evaluator.metric import Metric
from olive.evaluator.metric_result import joint_metric_key
from olive.evaluator.olive_evaluator import OliveEvaluatorFactory
from olive.evaluator.olive_evaluator import OliveEvaluatorConfig
from olive.exception import OlivePassError
from olive.hardware.accelerator import AcceleratorSpec
from olive.model import ONNXModelHandler
@@ -337,12 +337,13 @@ def eval_func(model):
)

# create evaluator for model
evaluator = OliveEvaluatorFactory.create_evaluator_for_model(olive_model)
evaluator_config = OliveEvaluatorConfig(metrics=[accuracy_metric])
evaluator = evaluator_config.create_evaluator(olive_model)

# evaluate model
result = evaluator.evaluate(
olive_model,
[accuracy_metric],
evaluator_config.metrics,
self.accelerator_spec.accelerator_type,
[self.accelerator_spec.execution_provider],
)
8 changes: 4 additions & 4 deletions olive/passes/onnx/perf_tuning.py
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@
from olive.data.config import DataConfig
from olive.evaluator.metric import LatencySubType, Metric, MetricType
from olive.evaluator.metric_result import joint_metric_key
from olive.evaluator.olive_evaluator import OliveEvaluatorConfig
from olive.exception import EXCEPTIONS_TO_RAISE
from olive.hardware.accelerator import AcceleratorLookup, AcceleratorSpec
from olive.model import ONNXModelHandler
@@ -372,8 +373,6 @@ def get_benchmark(
):
import onnxruntime as ort

from olive.evaluator.olive_evaluator import OliveEvaluatorFactory

# prepare the inference_settings for metrics.
tuning_result_file = None
if test_params:
@@ -405,8 +404,9 @@ def get_benchmark(
joint_key = joint_metric_key(latency_metric.name, latency_metric.sub_types[0].name)

start_time = time.perf_counter()
evaluator = OliveEvaluatorFactory.create_evaluator_for_model(model)
metric_result = evaluator.evaluate(model, [latency_metric], self.config.device, None)
evaluator_config = OliveEvaluatorConfig(metrics=[latency_metric])
evaluator = evaluator_config.create_evaluator(model)
metric_result = evaluator.evaluate(model, evaluator_config.metrics, self.config.device, None)

end_time = time.perf_counter()
latency_ms = metric_result[joint_key].value
8 changes: 7 additions & 1 deletion olive/passes/pytorch/lora.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@
from olive.model.config.hf_config import HfLoadKwargs
from olive.passes import Pass
from olive.passes.olive_pass import PassConfigParam
from olive.strategy.search_parameter import Categorical

if TYPE_CHECKING:
from peft import PeftModel
@@ -156,7 +157,12 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> Dict[str, PassCon
" True. 16+ is required when using bfloat16 and model has operators such as Where."
),
),
"lora_r": PassConfigParam(type_=int, default_value=64, description="Lora R dimension."),
"lora_r": PassConfigParam(
type_=int,
default_value=64,
searchable_values=Categorical([16, 32, 64]),
description="Lora R dimension.",
),
"lora_alpha": PassConfigParam(
type_=float, default_value=16, description="The alpha parameter for Lora scaling."
),
10 changes: 5 additions & 5 deletions olive/systems/azureml/aml_evaluation_runner.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
from typing import TYPE_CHECKING

from olive.common.hf.login import aml_runner_hf_login
from olive.evaluator.metric import Metric
from olive.evaluator.olive_evaluator import OliveEvaluatorConfig
from olive.hardware import AcceleratorSpec
from olive.logging import set_verbosity_from_env
from olive.model import ModelConfig
@@ -24,11 +24,11 @@ def main(raw_args=None):
aml_runner_hf_login()

pipeline_output, resources, model_config, extra_args = get_common_args(raw_args)
metric_config, extra_args = parse_config(extra_args, "metric", resources)
evaluator_config, extra_args = parse_config(extra_args, "evaluator", resources)
accelerator_config, extra_args = parse_config(extra_args, "accelerator", resources)

# load metric
metric = Metric.from_json(metric_config)
# load evaluator config
evaluator_config = OliveEvaluatorConfig.from_json(evaluator_config)

# load model config
model_config = ModelConfig.from_json(model_config)
@@ -39,7 +39,7 @@ def main(raw_args=None):
target: OliveSystem = LocalSystem()

# metric result
metric_result = target.evaluate_model(model_config, [metric], accelerator_spec)
metric_result = target.evaluate_model(model_config, evaluator_config, accelerator_spec)

# save metric result json
with (Path(pipeline_output) / "metric_result.json").open("w") as f:
21 changes: 11 additions & 10 deletions olive/systems/azureml/aml_system.py
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@
from olive.common.constants import HF_LOGIN, KEYVAULT_NAME, WORKFLOW_ARTIFACTS, WORKFLOW_CONFIG
from olive.common.utils import copy_dir, get_nested_dict_value, retry_func, set_nested_dict_value
from olive.evaluator.metric_result import MetricResult
from olive.evaluator.olive_evaluator import OliveEvaluatorConfig
from olive.model import ModelConfig
from olive.resource_path import (
AZUREML_RESOURCE_TYPES,
@@ -38,7 +39,6 @@
from olive.workflows.run.config import RunConfig

if TYPE_CHECKING:
from olive.evaluator.metric import Metric
from olive.hardware.accelerator import AcceleratorSpec
from olive.passes.olive_pass import Pass

@@ -573,7 +573,7 @@ def _load_model(self, input_model_config: dict, output_model_path, pipeline_outp
return ModelConfig(**model_json)

def evaluate_model(
self, model_config: ModelConfig, metrics: List["Metric"], accelerator: "AcceleratorSpec"
self, model_config: ModelConfig, evaluator_config: OliveEvaluatorConfig, accelerator: "AcceleratorSpec"
) -> MetricResult:
if model_config.type.lower() == "SNPEModel".lower():
raise NotImplementedError("SNPE model does not support azureml evaluation")
@@ -583,14 +583,14 @@ def evaluate_model(
with tempfile.TemporaryDirectory() as tempdir:
ml_client = self.azureml_client_config.create_client()
pipeline_job = self._create_pipeline_for_evaluation(
tempdir, model_config.to_json(check_object=True), metrics, accelerator
tempdir, model_config.to_json(check_object=True), evaluator_config, accelerator
)

# submit job
named_outputs_dir = self._run_job(ml_client, pipeline_job, "olive-evaluation", tempdir)

metric_results = {}
for metric in metrics:
for metric in evaluator_config.metrics:
metric_json = named_outputs_dir / metric.name / "metric_result.json"
if metric_json.is_file():
with metric_json.open() as f:
@@ -602,20 +602,20 @@ def _create_pipeline_for_evaluation(
self,
tmp_dir: str,
model_config: dict,
metrics: List["Metric"],
evaluator_config: OliveEvaluatorConfig,
accelerator: "AcceleratorSpec",
):
tmp_dir = Path(tmp_dir)

@pipeline
def evaluate_pipeline():
outputs = {}
for metric in metrics:
for metric in evaluator_config.metrics:
metric_tmp_dir = tmp_dir / metric.name
metric_component = self._create_metric_component(
metric_tmp_dir,
model_config,
metric,
OliveEvaluatorConfig(type=evaluator_config.type, metrics=[metric]).to_json(check_object=True),
accelerator.to_json(),
)
outputs[metric.name] = metric_component.outputs.pipeline_output
@@ -630,10 +630,10 @@ def _create_metric_component(
self,
tmp_dir: Path,
model_config: dict,
metric: "Metric",
evaluator_config: dict,
accelerator_config: dict,
):
metric_json = metric.to_json(check_object=True)
assert len(evaluator_config["metrics"]) == 1, "Cannot handle more than one metric per component"

# prepare code
script_name = "aml_evaluation_runner.py"
@@ -651,7 +651,7 @@ def _create_metric_component(

# prepare inputs
inputs, args = self.create_inputs_and_args(
{"model": model_config, "metric": metric_json, "accelerator": accelerator_config},
{"model": model_config, "evaluator": evaluator_config, "accelerator": accelerator_config},
tmp_dir,
ignore_keys=["model_attributes"],
)
@@ -660,6 +660,7 @@ def _create_metric_component(
outputs = {"pipeline_output": Output(type=AssetTypes.URI_FOLDER)}

# metric type
metric_json = evaluator_config["metrics"][0]
metric_type = metric_json["type"]
if metric_json["sub_types"] is not None:
sub_type_name = ",".join([st["name"] for st in metric_json["sub_types"]])
11 changes: 7 additions & 4 deletions olive/systems/docker/docker_system.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@

if TYPE_CHECKING:
from olive.evaluator.metric import Metric
from olive.evaluator.olive_evaluator import OliveEvaluatorConfig
from olive.hardware.accelerator import AcceleratorSpec
from olive.passes import Pass

@@ -214,11 +215,13 @@ def _run_pass_container(
return None

def evaluate_model(
self, model_config: "ModelConfig", metrics: List["Metric"], accelerator: "AcceleratorSpec"
self, model_config: "ModelConfig", evaluator_config: "OliveEvaluatorConfig", accelerator: "AcceleratorSpec"
) -> Dict[str, Any]:
container_root_path = Path("/olive-ws/")
with tempfile.TemporaryDirectory() as tempdir:
metric_json = self._run_eval_container(tempdir, model_config, metrics, accelerator, container_root_path)
metric_json = self._run_eval_container(
tempdir, model_config, evaluator_config, accelerator, container_root_path
)
if metric_json.is_file():
with metric_json.open() as f:
metrics_res = json.load(f)
@@ -231,7 +234,7 @@ def _run_eval_container(
self,
workdir,
model_config: "ModelConfig",
metrics: List["Metric"],
evaluator_config: "OliveEvaluatorConfig",
accelerator: "AcceleratorSpec",
container_root_path: Path,
):
@@ -254,7 +257,7 @@ def _run_eval_container(
)
volumes_list += model_mount_str_list

metrics_copy = copy.deepcopy(metrics)
metrics_copy = copy.deepcopy(evaluator_config.metrics)
# mount metrics related external files
volumes_list.extend(
# the metrics_copy is modified when creating the volumes list
4 changes: 2 additions & 2 deletions olive/systems/docker/eval.py
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@
import sys

from olive.common.hf.login import huggingface_login
from olive.evaluator.olive_evaluator import OliveEvaluator, OliveEvaluatorConfig, OliveEvaluatorFactory
from olive.evaluator.olive_evaluator import OliveEvaluator, OliveEvaluatorConfig
from olive.logging import set_verbosity_from_env
from olive.model import ModelConfig

@@ -29,7 +29,7 @@ def evaluate_entry(config, output_path, output_name, accelerator_type, execution

model = ModelConfig.from_json(model_json).create_model()

evaluator: OliveEvaluator = OliveEvaluatorFactory.create_evaluator_for_model(model)
evaluator: OliveEvaluator = evaluator_config.create_evaluator(model)
metrics_res = evaluator.evaluate(
model, evaluator_config.metrics, device=accelerator_type, execution_providers=execution_provider
)
Loading

0 comments on commit cc73974

Please sign in to comment.