forked from microsoft/promptflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Replace dag with flex in evals (microsoft#3521)
Remove deprecated DAG flags from built-in evaluators in the evals pacakge, replacing them with flex flows. Functionality this amounts to directly coding the behavior specified in former flow.dag.yaml files. This ended up editing 2 evaluators: F1 score and content safety. In the case of content safety, I used this change to refactor the code slightly into a base class and children, which mostly still exist just to have their own docstrings.
- Loading branch information
1 parent
87edf57
commit 7362dc1
Showing
39 changed files
with
2,695 additions
and
556 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17 changes: 13 additions & 4 deletions
17
src/promptflow-evals/promptflow/evals/evaluators/_content_safety/_content_safety.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 14 additions & 4 deletions
18
src/promptflow-evals/promptflow/evals/evaluators/_content_safety/_content_safety_chat.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
...w-evals/promptflow/evals/evaluators/_content_safety/_content_safety_sub_evaluator_base.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# --------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# --------------------------------------------------------- | ||
from abc import ABC | ||
|
||
try: | ||
from .common.constants import EvaluationMetrics | ||
from .common.evaluate_with_rai_service import evaluate_with_rai_service | ||
from .common.validate_inputs import validate_inputs | ||
except ImportError: | ||
from common.constants import EvaluationMetrics | ||
from common.evaluate_with_rai_service import evaluate_with_rai_service | ||
from common.validate_inputs import validate_inputs | ||
|
||
|
||
class ContentSafetySubEvaluatorBase(ABC): | ||
""" | ||
Initialize a evaluator for a specified Evaluation Metric. Base class that is not | ||
meant to be instantiated by users. | ||
:param metric: The metric to be evaluated. | ||
:type metric: ~promptflow.evals.evaluators._content_safety.flow.constants.EvaluationMetrics | ||
:param project_scope: The scope of the Azure AI project. | ||
It contains subscription id, resource group, and project name. | ||
:type project_scope: dict | ||
:param credential: The credential for connecting to Azure AI project. | ||
:type credential: TokenCredential | ||
""" | ||
|
||
def __init__(self, metric: EvaluationMetrics, project_scope: dict, credential=None): | ||
self._metric = metric | ||
self._project_scope = project_scope | ||
self._credential = credential | ||
|
||
def __call__(self, *, question: str, answer: str, **kwargs): | ||
""" | ||
Evaluates content according to this evaluator's metric. | ||
:param question: The question to be evaluated. | ||
:type question: str | ||
:param answer: The answer to be evaluated. | ||
:type answer: str | ||
:return: The evaluation score. | ||
:rtype: dict | ||
""" | ||
# Validate inputs | ||
# Raises value error if failed, so execution alone signifies success. | ||
_ = validate_inputs(question=question, answer=answer) | ||
|
||
# question: str, answer: str, metric_name: str, project_scope: dict, credential: TokenCredential | ||
# Run score computation based on supplied metric. | ||
result = evaluate_with_rai_service( | ||
metric_name=self._metric, | ||
question=question, | ||
answer=answer, | ||
project_scope=self._project_scope, | ||
credential=self._credential, | ||
) | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
12 changes: 12 additions & 0 deletions
12
src/promptflow-evals/promptflow/evals/evaluators/_content_safety/common/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# --------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# --------------------------------------------------------- | ||
|
||
from . import constants, evaluate_with_rai_service, validate_inputs, utils | ||
|
||
__all__ = [ | ||
"constants", | ||
"evaluate_with_rai_service", | ||
"validate_inputs", | ||
"utils", | ||
] |
3 changes: 3 additions & 0 deletions
3
...luators/_content_safety/flow/constants.py → ...ators/_content_safety/common/constants.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.