lint

wandb · Oct 16, 2024 · 3ca802d · 3ca802d
1 parent 633be15
commit 3ca802d
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 23 deletions.
diff --git a/weave/flow/scorers/hallucination_scorer.py b/weave/flow/scorers/hallucination_scorer.py
@@ -1,4 +1,3 @@
-import json
 from typing import List
 
 from pydantic import BaseModel, Field
@@ -111,18 +110,18 @@ class HallucinationFreeScorer(InstructorLLMScorer):
     based on the input data.
 
     Note:
-        - The meaning of "hallucination" can vary from person to person, you will likely want to 
+        - The meaning of "hallucination" can vary from person to person, you will likely want to
         customize the `system_prompt` and `user_prompt` to fit your specific needs.
-        - This Scorer uses the `InstructorLLMScorer` class to generate structured outputs from the LLM 
+        - This Scorer uses the `InstructorLLMScorer` class to generate structured outputs from the LLM
         provider's response; you will have to install the `instructor` python package to use it.
         - The `score` method expects the input column from the dataset to be named "context". It will use
-        this data as the ground-truth to check hallucinations against. If your dataset column has a 
-        different name, you can specify a different mapping using the `column_map` argument in the init 
+        this data as the ground-truth to check hallucinations against. If your dataset column has a
+        different name, you can specify a different mapping using the `column_map` argument in the init
         of HallucinationFreeScorer by passing `column_map={"context": "context"}`.
 
     Attributes:
         system_prompt (str): The prompt describing the task, defines what a "hallucination" is.
-        user_prompt (str): The string template to pass the input and output data. The template must 
+        user_prompt (str): The string template to pass the input and output data. The template must
         contain placeholders for both `{input_data}` and `{output}`.
         model_id (str): The LLM model name, depends on the LLM's providers to be used `client` being used.
         temperature (float): LLM temperature setting.

diff --git a/weave/flow/scorers/llm_utils.py b/weave/flow/scorers/llm_utils.py
@@ -19,11 +19,13 @@
 if TYPE_CHECKING:
     import instructor
     from anthropic import Anthropic, AsyncAnthropic
+    from google.generativeai import GenerativeModel
     from mistralai import Mistral
     from openai import AsyncOpenAI, OpenAI
-    from google.generativeai import GenerativeModel
 
-    _LLM_CLIENTS = Union[OpenAI, AsyncOpenAI, Anthropic, AsyncAnthropic, Mistral, GenerativeModel]
+    _LLM_CLIENTS = Union[
+        OpenAI, AsyncOpenAI, Anthropic, AsyncAnthropic, Mistral, GenerativeModel
+    ]
 else:
     _LLM_CLIENTS = object
 
@@ -62,15 +64,14 @@ def instructor_client(client: _LLM_CLIENTS) -> "instructor.client":  # type: ign
         raise ValueError(f"Unsupported client type: {client_type}")
 
 
-import json
 def create(client: instructor.client, *args, **kwargs) -> Any:  # type: ignore
     # gemini has slightly different argument namings...
     # max_tokens -> max_output_tokens
     if "generativemodel" in type(client.client).__name__.lower():
         max_output_tokens = kwargs.pop("max_tokens")
         temperature = kwargs.pop("temperature", None)
-        _ = kwargs.pop("model") # model is baked in the client
-        kwargs['generation_config'] = dict(
+        _ = kwargs.pop("model")  # model is baked in the client
+        kwargs["generation_config"] = dict(
             max_output_tokens=max_output_tokens,
             temperature=temperature,
         )

diff --git a/weave/flow/scorers/string_scorer.py b/weave/flow/scorers/string_scorer.py
@@ -1,5 +1,4 @@
-import re
-from typing import Callable, Union
+from typing import Callable
 
 from pydantic import Field, model_validator
 

diff --git a/weave/flow/scorers/summarization_scorer.py b/weave/flow/scorers/summarization_scorer.py
@@ -5,7 +5,7 @@
 
 import weave
 from weave.flow.scorers.llm_scorer import InstructorLLMScorer
-from weave.flow.scorers.llm_utils import create, OPENAI_DEFAULT_MODEL
+from weave.flow.scorers.llm_utils import OPENAI_DEFAULT_MODEL, create
 
 DEFAULT_EXTRACTION_SYSTEM_PROMPT = """
 Given a <text>, extract all the unique entities from the text without repetition.
@@ -89,20 +89,20 @@ class SummarizationScorer(InstructorLLMScorer):
     should look like.
 
     Note:
-        - This Scorer uses the `InstructorLLMScorer` class to generate structured outputs from the LLM 
+        - This Scorer uses the `InstructorLLMScorer` class to generate structured outputs from the LLM
         provider's response; you will have to install the `instructor` python package to use it.
         - The `score` method expects the input column from the dataset to be named "input". If your dataset
-        column has a different name, you can specify a different mapping using the `column_map` argument in the 
+        column has a different name, you can specify a different mapping using the `column_map` argument in the
         init of SummarizationScorer by passing `column_map={"input": "news_article"}`.
 
     Attributes:
-        extraction_system_prompt (str): System prompt to extract the distinct entities in the input. Customising 
+        extraction_system_prompt (str): System prompt to extract the distinct entities in the input. Customising
         this can help ensure that the LLM identifies the `entities` that you care about.
         extraction_prompt (str): Prompt template for entity extraction; must contain a `{text}` placeholder.
         summarization_evaluation_system_prompt (str): System prompt defining how to evaluate the quality of a summary.
             Asks an LLM to grade the summary from `poor`, `ok`, to `excellent` and provide a rationale for the grade.
-        summarization_evaluation_prompt (str): Prompt template for summarization evaluation instruction; must contain 
-            `{input}` and `{summary}` placeholders. 
+        summarization_evaluation_prompt (str): Prompt template for summarization evaluation instruction; must contain
+            `{input}` and `{summary}` placeholders.
         entity_density_threshold (float): Threshold for determining if a summary is sufficiently entity-dense.
         model_id (str): The LLM model name, depends on the LLM's providers to be used `client` being used.
         temperature (float): LLM temperature setting.
@@ -119,7 +119,6 @@ class SummarizationScorer(InstructorLLMScorer):
             Calculates summarization score and entity density score for the given input and output.
     """
 
-
     extraction_system_prompt: str = DEFAULT_EXTRACTION_SYSTEM_PROMPT
     extraction_prompt: str = DEFAULT_EXTRACTION_USER_PROMPT
     summarization_evaluation_system_prompt: str = (
@@ -179,9 +178,7 @@ def simple_word_tokenize(self, text: str) -> List[str]:
 
     @weave.op
     async def score(self, input: str, output: str, **kwargs: Any) -> dict:
-        extract_task = asyncio.to_thread(
-            self.extract_entities, text=str(output)
-        )
+        extract_task = asyncio.to_thread(self.extract_entities, text=str(output))
         evaluate_task = asyncio.to_thread(
             self.evaluate_summary, input=str(input), summary=str(output)
         )