Skip to content

Commit

Permalink
feat(js/plugins/checks): checks evaluator plugin returns multiple sco…
Browse files Browse the repository at this point in the history
…res (#1370)
  • Loading branch information
HunterHeston authored Dec 11, 2024
1 parent afcd77b commit a265ec1
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 29 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ js/testapps/firebase-functions-sample1/.firebase
js/testapps/firebase-functions-sample1/.firebaserc
js/testapps/firebase-functions-sample1/public/bundle.js
js/testapps/firebase-functions-sample1/public/config.js
.genkit
js/**/.genkit
samples/**/.genkit
go/**/.genkit
Expand Down
11 changes: 3 additions & 8 deletions js/plugins/checks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,11 @@ Create a JSON file with the data you want to test. Add as many test cases as you

```

### Run the evaluators
### Run the evaluator

```bash
# Run just the DANGEROUS_CONTENT classifier.
genkit eval:run test-dataset.json --evaluators=checks/dangerous_content
```

```bash
# Run all classifiers.
genkit eval:run test-dataset.json --evaluators=checks/dangerous_content,checks/pii_soliciting_reciting,checks/harassment,checks/sexually_explicit,checks/hate_speech,checks/medical_info,checks/violence_and_gore,checks/obscenity_and_profanity
# Run all configured classifiers.
genkit eval:run test-dataset.json --evaluators=checks/guardrails
```

### View the results
Expand Down
43 changes: 22 additions & 21 deletions js/plugins/checks/src/evaluation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ export function checksEvaluators(
auth: GoogleAuth,
metrics: ChecksEvaluationMetric[],
projectId: string
): EvaluatorAction[] {
): EvaluatorAction {
const policy_configs: ChecksEvaluationMetricConfig[] = metrics.map(
(metric) => {
const metricType = isConfig(metric) ? metric.type : metric;
Expand All @@ -77,11 +77,7 @@ export function checksEvaluators(
}
);

const evaluators = policy_configs.map((policy_config) => {
return createPolicyEvaluator(projectId, auth, ai, policy_config);
});

return evaluators;
return createPolicyEvaluator(projectId, auth, ai, policy_configs);
}

function isConfig(
Expand All @@ -104,15 +100,13 @@ function createPolicyEvaluator(
projectId: string,
auth: GoogleAuth,
ai: Genkit,
policy_config: ChecksEvaluationMetricConfig
policy_config: ChecksEvaluationMetricConfig[]
): EvaluatorAction {
const policyType = policy_config.type as string;

return ai.defineEvaluator(
{
name: `checks/${policyType.toLowerCase()}`,
displayName: policyType,
definition: `Evaluates text against the Checks ${policyType} policy.`,
name: 'checks/guardrails',
displayName: 'checks/guardrails',
definition: `Evaluates input text against the Checks ${policy_config.map((policy) => policy.type)} policies.`,
},
async (datapoint: BaseEvalDataPoint) => {
const partialRequest = {
Expand All @@ -121,10 +115,12 @@ function createPolicyEvaluator(
content: datapoint.output as string,
},
},
policies: {
policy_type: policy_config.type,
threshold: policy_config.threshold,
},
policies: policy_config.map((config) => {
return {
policy_type: config.type,
threshold: config.threshold,
};
}),
};

const response = await checksEvalInstance(
Expand All @@ -134,13 +130,18 @@ function createPolicyEvaluator(
ResponseSchema
);

return {
evaluation: {
score: response.policyResults[0].score,
const evaluationResults = response.policyResults.map((result) => {
return {
id: result.policyType,
score: result.score,
details: {
reasoning: response.policyResults[0].violationResult,
reasoning: `Status ${result.violationResult}`,
},
},
};
});

return {
evaluation: evaluationResults,
testCaseId: datapoint.testCaseId,
};
}
Expand Down

0 comments on commit a265ec1

Please sign in to comment.