Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
tcapelle committed Oct 16, 2024
1 parent 633be15 commit 3ca802d
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 23 deletions.
11 changes: 5 additions & 6 deletions weave/flow/scorers/hallucination_scorer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
from typing import List

from pydantic import BaseModel, Field
Expand Down Expand Up @@ -111,18 +110,18 @@ class HallucinationFreeScorer(InstructorLLMScorer):
based on the input data.
Note:
- The meaning of "hallucination" can vary from person to person, you will likely want to
- The meaning of "hallucination" can vary from person to person, you will likely want to
customize the `system_prompt` and `user_prompt` to fit your specific needs.
- This Scorer uses the `InstructorLLMScorer` class to generate structured outputs from the LLM
- This Scorer uses the `InstructorLLMScorer` class to generate structured outputs from the LLM
provider's response; you will have to install the `instructor` python package to use it.
- The `score` method expects the input column from the dataset to be named "context". It will use
this data as the ground-truth to check hallucinations against. If your dataset column has a
different name, you can specify a different mapping using the `column_map` argument in the init
this data as the ground-truth to check hallucinations against. If your dataset column has a
different name, you can specify a different mapping using the `column_map` argument in the init
of HallucinationFreeScorer by passing `column_map={"context": "context"}`.
Attributes:
system_prompt (str): The prompt describing the task, defines what a "hallucination" is.
user_prompt (str): The string template to pass the input and output data. The template must
user_prompt (str): The string template to pass the input and output data. The template must
contain placeholders for both `{input_data}` and `{output}`.
model_id (str): The LLM model name, depends on the LLM's providers to be used `client` being used.
temperature (float): LLM temperature setting.
Expand Down
11 changes: 6 additions & 5 deletions weave/flow/scorers/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
if TYPE_CHECKING:
import instructor
from anthropic import Anthropic, AsyncAnthropic
from google.generativeai import GenerativeModel
from mistralai import Mistral
from openai import AsyncOpenAI, OpenAI
from google.generativeai import GenerativeModel

_LLM_CLIENTS = Union[OpenAI, AsyncOpenAI, Anthropic, AsyncAnthropic, Mistral, GenerativeModel]
_LLM_CLIENTS = Union[
OpenAI, AsyncOpenAI, Anthropic, AsyncAnthropic, Mistral, GenerativeModel
]
else:
_LLM_CLIENTS = object

Expand Down Expand Up @@ -62,15 +64,14 @@ def instructor_client(client: _LLM_CLIENTS) -> "instructor.client": # type: ign
raise ValueError(f"Unsupported client type: {client_type}")


import json
def create(client: instructor.client, *args, **kwargs) -> Any: # type: ignore
# gemini has slightly different argument namings...
# max_tokens -> max_output_tokens
if "generativemodel" in type(client.client).__name__.lower():
max_output_tokens = kwargs.pop("max_tokens")
temperature = kwargs.pop("temperature", None)
_ = kwargs.pop("model") # model is baked in the client
kwargs['generation_config'] = dict(
_ = kwargs.pop("model") # model is baked in the client
kwargs["generation_config"] = dict(
max_output_tokens=max_output_tokens,
temperature=temperature,
)
Expand Down
3 changes: 1 addition & 2 deletions weave/flow/scorers/string_scorer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from typing import Callable, Union
from typing import Callable

from pydantic import Field, model_validator

Expand Down
17 changes: 7 additions & 10 deletions weave/flow/scorers/summarization_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import weave
from weave.flow.scorers.llm_scorer import InstructorLLMScorer
from weave.flow.scorers.llm_utils import create, OPENAI_DEFAULT_MODEL
from weave.flow.scorers.llm_utils import OPENAI_DEFAULT_MODEL, create

DEFAULT_EXTRACTION_SYSTEM_PROMPT = """
Given a <text>, extract all the unique entities from the text without repetition.
Expand Down Expand Up @@ -89,20 +89,20 @@ class SummarizationScorer(InstructorLLMScorer):
should look like.
Note:
- This Scorer uses the `InstructorLLMScorer` class to generate structured outputs from the LLM
- This Scorer uses the `InstructorLLMScorer` class to generate structured outputs from the LLM
provider's response; you will have to install the `instructor` python package to use it.
- The `score` method expects the input column from the dataset to be named "input". If your dataset
column has a different name, you can specify a different mapping using the `column_map` argument in the
column has a different name, you can specify a different mapping using the `column_map` argument in the
init of SummarizationScorer by passing `column_map={"input": "news_article"}`.
Attributes:
extraction_system_prompt (str): System prompt to extract the distinct entities in the input. Customising
extraction_system_prompt (str): System prompt to extract the distinct entities in the input. Customising
this can help ensure that the LLM identifies the `entities` that you care about.
extraction_prompt (str): Prompt template for entity extraction; must contain a `{text}` placeholder.
summarization_evaluation_system_prompt (str): System prompt defining how to evaluate the quality of a summary.
Asks an LLM to grade the summary from `poor`, `ok`, to `excellent` and provide a rationale for the grade.
summarization_evaluation_prompt (str): Prompt template for summarization evaluation instruction; must contain
`{input}` and `{summary}` placeholders.
summarization_evaluation_prompt (str): Prompt template for summarization evaluation instruction; must contain
`{input}` and `{summary}` placeholders.
entity_density_threshold (float): Threshold for determining if a summary is sufficiently entity-dense.
model_id (str): The LLM model name, depends on the LLM's providers to be used `client` being used.
temperature (float): LLM temperature setting.
Expand All @@ -119,7 +119,6 @@ class SummarizationScorer(InstructorLLMScorer):
Calculates summarization score and entity density score for the given input and output.
"""


extraction_system_prompt: str = DEFAULT_EXTRACTION_SYSTEM_PROMPT
extraction_prompt: str = DEFAULT_EXTRACTION_USER_PROMPT
summarization_evaluation_system_prompt: str = (
Expand Down Expand Up @@ -179,9 +178,7 @@ def simple_word_tokenize(self, text: str) -> List[str]:

@weave.op
async def score(self, input: str, output: str, **kwargs: Any) -> dict:
extract_task = asyncio.to_thread(
self.extract_entities, text=str(output)
)
extract_task = asyncio.to_thread(self.extract_entities, text=str(output))
evaluate_task = asyncio.to_thread(
self.evaluate_summary, input=str(input), summary=str(output)
)
Expand Down

0 comments on commit 3ca802d

Please sign in to comment.