Skip to content

Commit

Permalink
move classification out
Browse files Browse the repository at this point in the history
  • Loading branch information
tcapelle committed Oct 10, 2024
1 parent 200c115 commit 18d3d81
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 56 deletions.
20 changes: 19 additions & 1 deletion weave/flow/scorer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,22 @@
from weave.flow.scorer.base_scorer import *
from weave.flow.scorer.base_scorer import Scorer, auto_summarize, get_scorer_attributes
from weave.flow.scorer.classification import MultiTaskBinaryClassificationF1
from weave.flow.scorer.regex_scorer import RegexScorer
from weave.flow.scorer.json_scorer import JSONScorer
from weave.flow.scorer.llm_scorer import LLMScorer, EmbeddingScorer, OpenAIModerationScorer
from weave.flow.scorer.pydantic_scorer import PydanticScorer
from weave.flow.scorer.hallucination import HallucinationScorer


__all__ = [
"Scorer",
"auto_summarize",
"get_scorer_attributes",
"MultiTaskBinaryClassificationF1",
"RegexScorer",
"JSONScorer",
"LLMScorer",
"EmbeddingScorer",
"OpenAIModerationScorer",
"PydanticScorer",
"HallucinationScorer",
]
56 changes: 1 addition & 55 deletions weave/flow/scorer/base_scorer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from collections import defaultdict
from numbers import Number
from typing import Any, Callable, Optional, Sequence, Tuple, Union

Expand Down Expand Up @@ -102,57 +101,4 @@ def get_scorer_attributes(
summarize_fn = auto_summarize # type: ignore
else:
raise ValueError(f"Unknown scorer type: {scorer}")
return (scorer_name, score_fn, summarize_fn) # type: ignore


def p_r_f1(tp: int, fp: int, fn: int) -> Tuple[float, float, float]:
# if any denom is zero, then zero. could use NaN instead...
precision: float = 0
if tp or fp:
precision = tp / (tp + fp)
recall: float = 0
if tp or fn:
recall = tp / (tp + fn)
f1: float = 0
if precision or recall:
f1 = 2 * (precision * recall) / (precision + recall)
return precision, recall, f1


class MultiTaskBinaryClassificationF1(Scorer):
class_names: list[str]

@weave.op()
def summarize(self, score_rows: list) -> Optional[dict]:
result = {}
cols = transpose(score_rows)

for class_name in self.class_names:
col = cols[class_name]
tp = sum(r["correct"] and not r["negative"] for r in col)
fp = sum(not r["correct"] and not r["negative"] for r in col)
fn = sum(not r["correct"] and r["negative"] for r in col)
precision, recall, f1 = p_r_f1(tp, fp, fn)
result[class_name] = {"f1": f1, "precision": precision, "recall": recall}

return result

@weave.op()
def score(self, target: dict, model_output: Optional[dict]) -> dict:
result = {}
for class_name in self.class_names:
class_label = target.get(class_name)
class_model_output = model_output.get(class_name) if model_output else None
result[class_name] = {
"correct": class_label == class_model_output,
"negative": not class_model_output,
}
return result


def transpose(rows: list[dict]) -> dict[str, list]:
cols = defaultdict(list)
for row in rows:
for k, v in row.items():
cols[k].append(v)
return dict(cols)
return (scorer_name, score_fn, summarize_fn) # type: ignore
58 changes: 58 additions & 0 deletions weave/flow/scorer/classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from collections import defaultdict
from typing import Optional, Tuple

import weave
from weave.flow.scorer.base_scorer import Scorer


def p_r_f1(tp: int, fp: int, fn: int) -> Tuple[float, float, float]:
# if any denom is zero, then zero. could use NaN instead...
precision: float = 0
if tp or fp:
precision = tp / (tp + fp)
recall: float = 0
if tp or fn:
recall = tp / (tp + fn)
f1: float = 0
if precision or recall:
f1 = 2 * (precision * recall) / (precision + recall)
return precision, recall, f1


class MultiTaskBinaryClassificationF1(Scorer):
class_names: list[str]

@weave.op()
def summarize(self, score_rows: list) -> Optional[dict]:
result = {}
cols = transpose(score_rows)

for class_name in self.class_names:
col = cols[class_name]
tp = sum(r["correct"] and not r["negative"] for r in col)
fp = sum(not r["correct"] and not r["negative"] for r in col)
fn = sum(not r["correct"] and r["negative"] for r in col)
precision, recall, f1 = p_r_f1(tp, fp, fn)
result[class_name] = {"f1": f1, "precision": precision, "recall": recall}

return result

@weave.op()
def score(self, target: dict, model_output: Optional[dict]) -> dict:
result = {}
for class_name in self.class_names:
class_label = target.get(class_name)
class_model_output = model_output.get(class_name) if model_output else None
result[class_name] = {
"correct": class_label == class_model_output,
"negative": not class_model_output,
}
return result


def transpose(rows: list[dict]) -> dict[str, list]:
cols = defaultdict(list)
for row in rows:
for k, v in row.items():
cols[k].append(v)
return dict(cols)

0 comments on commit 18d3d81

Please sign in to comment.