move classification out

wandb · Oct 10, 2024 · 18d3d81 · 18d3d81
1 parent 200c115
commit 18d3d81
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 56 deletions.
diff --git a/weave/flow/scorer/__init__.py b/weave/flow/scorer/__init__.py
@@ -1,4 +1,22 @@
-from weave.flow.scorer.base_scorer import *
+from weave.flow.scorer.base_scorer import Scorer, auto_summarize, get_scorer_attributes
+from weave.flow.scorer.classification import MultiTaskBinaryClassificationF1
 from weave.flow.scorer.regex_scorer import RegexScorer
 from weave.flow.scorer.json_scorer import JSONScorer
 from weave.flow.scorer.llm_scorer import LLMScorer, EmbeddingScorer, OpenAIModerationScorer
+from weave.flow.scorer.pydantic_scorer import PydanticScorer
+from weave.flow.scorer.hallucination import HallucinationScorer
+
+
+__all__ = [
+    "Scorer",
+    "auto_summarize",
+    "get_scorer_attributes",
+    "MultiTaskBinaryClassificationF1",
+    "RegexScorer",
+    "JSONScorer",
+    "LLMScorer",
+    "EmbeddingScorer",
+    "OpenAIModerationScorer",
+    "PydanticScorer",
+    "HallucinationScorer",
+]
diff --git a/weave/flow/scorer/base_scorer.py b/weave/flow/scorer/base_scorer.py
@@ -1,4 +1,3 @@
-from collections import defaultdict
 from numbers import Number
 from typing import Any, Callable, Optional, Sequence, Tuple, Union
 
@@ -102,57 +101,4 @@ def get_scorer_attributes(
         summarize_fn = auto_summarize  # type: ignore
     else:
         raise ValueError(f"Unknown scorer type: {scorer}")
-    return (scorer_name, score_fn, summarize_fn)  # type: ignore
-
-
-def p_r_f1(tp: int, fp: int, fn: int) -> Tuple[float, float, float]:
-    # if any denom is zero, then zero. could use NaN instead...
-    precision: float = 0
-    if tp or fp:
-        precision = tp / (tp + fp)
-    recall: float = 0
-    if tp or fn:
-        recall = tp / (tp + fn)
-    f1: float = 0
-    if precision or recall:
-        f1 = 2 * (precision * recall) / (precision + recall)
-    return precision, recall, f1
-
-
-class MultiTaskBinaryClassificationF1(Scorer):
-    class_names: list[str]
-
-    @weave.op()
-    def summarize(self, score_rows: list) -> Optional[dict]:
-        result = {}
-        cols = transpose(score_rows)
-
-        for class_name in self.class_names:
-            col = cols[class_name]
-            tp = sum(r["correct"] and not r["negative"] for r in col)
-            fp = sum(not r["correct"] and not r["negative"] for r in col)
-            fn = sum(not r["correct"] and r["negative"] for r in col)
-            precision, recall, f1 = p_r_f1(tp, fp, fn)
-            result[class_name] = {"f1": f1, "precision": precision, "recall": recall}
-
-        return result
-
-    @weave.op()
-    def score(self, target: dict, model_output: Optional[dict]) -> dict:
-        result = {}
-        for class_name in self.class_names:
-            class_label = target.get(class_name)
-            class_model_output = model_output.get(class_name) if model_output else None
-            result[class_name] = {
-                "correct": class_label == class_model_output,
-                "negative": not class_model_output,
-            }
-        return result
-
-
-def transpose(rows: list[dict]) -> dict[str, list]:
-    cols = defaultdict(list)
-    for row in rows:
-        for k, v in row.items():
-            cols[k].append(v)
-    return dict(cols)
+    return (scorer_name, score_fn, summarize_fn)  # type: ignore
diff --git a/weave/flow/scorer/classification.py b/weave/flow/scorer/classification.py
@@ -0,0 +1,58 @@
+from collections import defaultdict
+from typing import Optional, Tuple
+
+import weave
+from weave.flow.scorer.base_scorer import Scorer
+
+
+def p_r_f1(tp: int, fp: int, fn: int) -> Tuple[float, float, float]:
+    # if any denom is zero, then zero. could use NaN instead...
+    precision: float = 0
+    if tp or fp:
+        precision = tp / (tp + fp)
+    recall: float = 0
+    if tp or fn:
+        recall = tp / (tp + fn)
+    f1: float = 0
+    if precision or recall:
+        f1 = 2 * (precision * recall) / (precision + recall)
+    return precision, recall, f1
+
+
+class MultiTaskBinaryClassificationF1(Scorer):
+    class_names: list[str]
+
+    @weave.op()
+    def summarize(self, score_rows: list) -> Optional[dict]:
+        result = {}
+        cols = transpose(score_rows)
+
+        for class_name in self.class_names:
+            col = cols[class_name]
+            tp = sum(r["correct"] and not r["negative"] for r in col)
+            fp = sum(not r["correct"] and not r["negative"] for r in col)
+            fn = sum(not r["correct"] and r["negative"] for r in col)
+            precision, recall, f1 = p_r_f1(tp, fp, fn)
+            result[class_name] = {"f1": f1, "precision": precision, "recall": recall}
+
+        return result
+
+    @weave.op()
+    def score(self, target: dict, model_output: Optional[dict]) -> dict:
+        result = {}
+        for class_name in self.class_names:
+            class_label = target.get(class_name)
+            class_model_output = model_output.get(class_name) if model_output else None
+            result[class_name] = {
+                "correct": class_label == class_model_output,
+                "negative": not class_model_output,
+            }
+        return result
+
+
+def transpose(rows: list[dict]) -> dict[str, list]:
+    cols = defaultdict(list)
+    for row in rows:
+        for k, v in row.items():
+            cols[k].append(v)
+    return dict(cols)