From f59b5acb4229fd8cddb048618e1f8050f0c8f334 Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Tue, 7 Jan 2025 14:36:07 -0700 Subject: [PATCH 1/6] fix: updates based on live API --- src/agentTester.ts | 34 +++++++++---------- src/index.ts | 2 +- test/agentTester.test.ts | 20 +++++------ ...ions_runs_4KBSM000000003F4AQ_results.json} | 0 4 files changed, 28 insertions(+), 28 deletions(-) rename test/mocks/{einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json => einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json} (100%) diff --git a/src/agentTester.ts b/src/agentTester.ts index 41c9dd6..f97c64c 100644 --- a/src/agentTester.ts +++ b/src/agentTester.ts @@ -53,7 +53,7 @@ export type TestCaseResult = { }>; }; -export type AgentTestDetailsResponse = { +export type AgentTestResultsResponse = { status: TestStatus; startTime: string; endTime?: string; @@ -106,7 +106,7 @@ export class AgentTester { * * @param {string} jobId * @param {Duration} timeout - * @returns {Promise} + * @returns {Promise} */ public async poll( jobId: string, @@ -117,17 +117,17 @@ export class AgentTester { } = { timeout: Duration.minutes(5), } - ): Promise { + ): Promise { const frequency = env.getNumber('SF_AGENT_TEST_POLLING_FREQUENCY_MS', 1000); const lifecycle = Lifecycle.getInstance(); const client = await PollingClient.create({ poll: async (): Promise => { // NOTE: we don't actually need to call the status API here since all the same information is present on the // details API. We could just call the details API and check the status there. - const [detailsResponse, statusResponse] = await Promise.all([this.details(jobId), this.status(jobId)]); - const totalTestCases = detailsResponse.testSet.testCases.length; - const failingTestCases = detailsResponse.testSet.testCases.filter((tc) => tc.status === 'ERROR').length; - const passingTestCases = detailsResponse.testSet.testCases.filter( + const [resultsResponse, statusResponse] = await Promise.all([this.results(jobId), this.status(jobId)]); + const totalTestCases = resultsResponse.testSet.testCases.length; + const failingTestCases = resultsResponse.testSet.testCases.filter((tc) => tc.status === 'ERROR').length; + const passingTestCases = resultsResponse.testSet.testCases.filter( (tc) => tc.status === 'COMPLETED' && tc.expectationResults.every((r) => r.result === 'Passed') ).length; @@ -139,7 +139,7 @@ export class AgentTester { failingTestCases, passingTestCases, }); - return { payload: detailsResponse, completed: true }; + return { payload: resultsResponse, completed: true }; } await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { @@ -155,19 +155,19 @@ export class AgentTester { timeout, }); - return client.subscribe(); + return client.subscribe(); } /** * Request test run details * * @param {string} jobId - * @returns {Promise} + * @returns {Promise} */ - public async details(jobId: string): Promise { - const url = `/einstein/ai-evaluations/runs/${jobId}/details`; + public async results(jobId: string): Promise { + const url = `/einstein/ai-evaluations/runs/${jobId}/results`; - return this.maybeMock.request('GET', url); + return this.maybeMock.request('GET', url); } /** @@ -246,7 +246,7 @@ function makeSimpleTable(data: Record, title: string): string { return `${title}\n${table}`; } -export async function humanFormat(details: AgentTestDetailsResponse): Promise { +export async function humanFormat(details: AgentTestResultsResponse): Promise { const { Ux } = await import('@salesforce/sf-plugins-core'); const ux = new Ux(); @@ -312,11 +312,11 @@ export async function humanFormat(details: AgentTestDetailsResponse): Promise { +export async function jsonFormat(details: AgentTestResultsResponse): Promise { return Promise.resolve(JSON.stringify(details, null, 2)); } -export async function junitFormat(details: AgentTestDetailsResponse): Promise { +export async function junitFormat(details: AgentTestResultsResponse): Promise { // eslint-disable-next-line import/no-extraneous-dependencies const { XMLBuilder } = await import('fast-xml-parser'); const builder = new XMLBuilder({ @@ -369,7 +369,7 @@ export async function junitFormat(details: AgentTestDetailsResponse): Promise\n${suites}`.trim(); } -export async function tapFormat(details: AgentTestDetailsResponse): Promise { +export async function tapFormat(details: AgentTestResultsResponse): Promise { const lines: string[] = []; let expectationCount = 0; for (const testCase of details.testSet.testCases) { diff --git a/src/index.ts b/src/index.ts index 3a5aa53..0d8f5ca 100644 --- a/src/index.ts +++ b/src/index.ts @@ -20,7 +20,7 @@ export { jsonFormat, junitFormat, tapFormat, - type AgentTestDetailsResponse, + type AgentTestResultsResponse, type AgentTestStartResponse, type AgentTestStatusResponse, type TestCaseResult, diff --git a/test/agentTester.test.ts b/test/agentTester.test.ts index 205a780..6d1e2c0 100644 --- a/test/agentTester.test.ts +++ b/test/agentTester.test.ts @@ -8,7 +8,7 @@ import { readFile } from 'node:fs/promises'; import { expect } from 'chai'; import { MockTestOrgData, TestContext } from '@salesforce/core/testSetup'; import { Connection } from '@salesforce/core'; -import { AgentTestDetailsResponse, AgentTester, humanFormat, junitFormat, tapFormat } from '../src/agentTester'; +import { AgentTestResultsResponse, AgentTester, humanFormat, junitFormat, tapFormat } from '../src/agentTester'; describe('AgentTester', () => { const $$ = new TestContext(); @@ -62,11 +62,11 @@ describe('AgentTester', () => { }); }); - describe('details', () => { - it('should return details of completed test run', async () => { + describe('results', () => { + it('should return results of completed test run', async () => { const tester = new AgentTester(connection); await tester.start('suiteId'); - const output = await tester.details('4KBSM000000003F4AQ'); + const output = await tester.results('4KBSM000000003F4AQ'); // TODO: make this assertion more meaningful expect(output).to.be.ok; }); @@ -84,8 +84,8 @@ describe('AgentTester', () => { describe('humanFormat', () => { it('should transform test results to human readable format', async () => { - const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json', 'utf8'); - const input = JSON.parse(raw) as AgentTestDetailsResponse; + const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json', 'utf8'); + const input = JSON.parse(raw) as AgentTestResultsResponse; const output = await humanFormat(input); expect(output).to.be.ok; }); @@ -93,8 +93,8 @@ describe('humanFormat', () => { describe('junitFormatter', () => { it('should transform test results to JUnit format', async () => { - const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json', 'utf8'); - const input = JSON.parse(raw) as AgentTestDetailsResponse; + const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json', 'utf8'); + const input = JSON.parse(raw) as AgentTestResultsResponse; const output = await junitFormat(input); expect(output).to.deep.equal(` @@ -112,8 +112,8 @@ describe('junitFormatter', () => { describe('tapFormatter', () => { it('should transform test results to TAP format', async () => { - const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json', 'utf8'); - const input = JSON.parse(raw) as AgentTestDetailsResponse; + const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json', 'utf8'); + const input = JSON.parse(raw) as AgentTestResultsResponse; const output = await tapFormat(input); expect(output).to.deep.equal(`Tap Version 14 1..6 diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json similarity index 100% rename from test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json rename to test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json From e198c77a6287550571593d04aa548b0db042e885 Mon Sep 17 00:00:00 2001 From: svc-cli-bot Date: Tue, 7 Jan 2025 21:37:42 +0000 Subject: [PATCH 2/6] chore(release): 0.5.10-dev.0 [skip ci] --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index e025492..4abfbac 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/agents", "description": "Client side APIs for working with Salesforce agents", - "version": "0.5.9", + "version": "0.5.10-dev.0", "license": "BSD-3-Clause", "author": "Salesforce", "main": "lib/index", From 7757c162313559169ccc31fb88367114bf8b8b86 Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Fri, 10 Jan 2025 09:56:56 -0700 Subject: [PATCH 3/6] fix: api updates --- CONTRIBUTING.md | 22 +++----- src/agentTester.ts | 53 +++++++++++-------- ...tions_runs_4KBSM000000003F4AQ_results.json | 20 ++++--- 3 files changed, 47 insertions(+), 48 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 73226ad..5f994e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,16 +1,16 @@ ## Contributing 1. Familiarize yourself with the codebase by reading the docs, in - particular the [developing](./contributing/developing.md) doc. -1. Create a new issue before starting your project so that we can keep track of + particular the [developing](./developing.md) doc. +2. Create a new issue before starting your project so that we can keep track of what you're trying to add/fix. That way, we can also offer suggestions or let you know if there is already an effort in progress. -1. Fork this repository. -1. Set up your environment using the information in the [developing](./contributing/developing.md) doc. -1. Create a _topic_ branch in your fork based on the correct branch (usually the **develop** branch, see [Branches section](./contributing/developing.md)). Note: this step is recommended but technically not required if contributing using a fork. -1. Edit the code in your fork. -1. Sign the CLA (see [CLA](#cla)). -1. Send us a pull request when you're done. We'll review your code, suggest any +3. Fork this repository. +4. Set up your environment using the information in the [developing](./developing.md) doc. +5. Create a _topic_ branch in your fork based on the correct branch (usually the **develop** branch, see [Branches section](./developing.md)). Note: this step is recommended but technically not required if contributing using a fork. +6. Edit the code in your fork. +7. Sign the CLA (see [CLA](#cla)). +8. Send us a pull request when you're done. We'll review your code, suggest any needed changes, and merge it in. ## Pull Requests @@ -31,9 +31,3 @@ Agreement. You can do so by going to . ### Merging Pull Requests Pull request merging is restricted to squash and merge only. - -## Helpful Resources - -- All of the files in the [contributing](./contributing) folder have useful information, particularly the previously-mentioned [developing](./contributing/developing.md) doc. -- The [Source-Deploy-Retrieve Handbook](./HANDBOOK.md) contains an overview of all of the code in this project. This easy-to-read document can serve as an introduction and overview of the code and concepts, or as a reference for what a given module accomplishes and why it was designed. -- The [API documentation](https://forcedotcom.github.io/source-deploy-retrieve/) has details on using the classes and methods. diff --git a/src/agentTester.ts b/src/agentTester.ts index f97c64c..9b307a0 100644 --- a/src/agentTester.ts +++ b/src/agentTester.ts @@ -25,14 +25,14 @@ export type AgentTestStatusResponse = { export type TestCaseResult = { status: TestStatus; - number: string; utterance: string; + utterence: string; startTime: string; endTime?: string; generatedData: { type: 'AGENT'; actionsSequence: string[]; - outcome: 'Success' | 'Failure'; + outcome: string; topic: string; inputTokensCount: string; outputTokensCount: string; @@ -42,7 +42,7 @@ export type TestCaseResult = { actualValue: string; expectedValue: string; score: number; - result: 'Passed' | 'Failed'; + result: 'PASS' | 'FAIL'; metricLabel: 'Accuracy' | 'Precision'; metricExplainability: string; status: TestStatus; @@ -122,19 +122,19 @@ export class AgentTester { const lifecycle = Lifecycle.getInstance(); const client = await PollingClient.create({ poll: async (): Promise => { - // NOTE: we don't actually need to call the status API here since all the same information is present on the - // details API. We could just call the details API and check the status there. - const [resultsResponse, statusResponse] = await Promise.all([this.results(jobId), this.status(jobId)]); + const resultsResponse = await this.results(jobId); const totalTestCases = resultsResponse.testSet.testCases.length; - const failingTestCases = resultsResponse.testSet.testCases.filter((tc) => tc.status === 'ERROR').length; const passingTestCases = resultsResponse.testSet.testCases.filter( - (tc) => tc.status === 'COMPLETED' && tc.expectationResults.every((r) => r.result === 'Passed') + (tc) => tc.status === 'COMPLETED' && tc.expectationResults.every((r) => r.result === 'PASS') + ).length; + const failingTestCases = resultsResponse.testSet.testCases.filter( + (tc) => ['ERROR', 'COMPLETED'].includes(tc.status) && tc.expectationResults.some((r) => r.result === 'FAIL') ).length; - if (statusResponse.status.toLowerCase() === 'completed') { + if (resultsResponse.status.toLowerCase() === 'completed') { await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { jobId, - status: statusResponse.status, + status: resultsResponse.status, totalTestCases, failingTestCases, passingTestCases, @@ -144,7 +144,7 @@ export class AgentTester { await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { jobId, - status: statusResponse.status, + status: resultsResponse.status, totalTestCases, failingTestCases, passingTestCases, @@ -252,13 +252,16 @@ export async function humanFormat(details: AgentTestResultsResponse): Promise ({ test: humanFriendlyName(r.name), - result: r.result === 'Passed' ? ansis.green('Pass') : ansis.red('Fail'), + result: r.result === 'PASS' ? ansis.green('Pass') : ansis.red('Fail'), expected: r.expectedValue, actual: r.actualValue, })), @@ -269,19 +272,19 @@ export async function humanFormat(details: AgentTestResultsResponse): Promise { const topic = tc.expectationResults.find((r) => r.name === 'topic_sequence_match'); - return topic?.result === 'Passed' ? acc + 1 : acc; + return topic?.result === 'PASS' ? acc + 1 : acc; }, 0); const topicPassPercent = (topicPassCount / details.testSet.testCases.length) * 100; const actionPassCount = details.testSet.testCases.reduce((acc, tc) => { const action = tc.expectationResults.find((r) => r.name === 'action_sequence_match'); - return action?.result === 'Passed' ? acc + 1 : acc; + return action?.result === 'PASS' ? acc + 1 : acc; }, 0); const actionPassPercent = (actionPassCount / details.testSet.testCases.length) * 100; const outcomePassCount = details.testSet.testCases.reduce((acc, tc) => { const outcome = tc.expectationResults.find((r) => r.name === 'bot_response_rating'); - return outcome?.result === 'Passed' ? acc + 1 : acc; + return outcome?.result === 'PASS' ? acc + 1 : acc; }, 0); const outcomePassPercent = (outcomePassCount / details.testSet.testCases.length) * 100; @@ -300,9 +303,9 @@ export async function humanFormat(details: AgentTestResultsResponse): Promise tc.status === 'ERROR'); const failedTestCasesObj = Object.fromEntries( Object.entries(failedTestCases).map(([, tc]) => [ - `Test Case #${tc.number}`, + `Test Case #${failedTestCases.indexOf(tc) + 1}`, tc.expectationResults - .filter((r) => r.result === 'Failed') + .filter((r) => r.result === 'FAIL') .map((r) => humanFriendlyName(r.name)) .join(', '), ]) @@ -326,7 +329,9 @@ export async function junitFormat(details: AgentTestResultsResponse): Promise tc.status === 'ERROR').length; + const failureCount = details.testSet.testCases.filter( + (tc) => ['ERROR', 'COMPLETED'].includes(tc.status) && tc.expectationResults.some((r) => r.result === 'FAIL') + ).length; const time = details.testSet.testCases.reduce((acc, tc) => { if (tc.endTime && tc.startTime) { return acc + new Date(tc.endTime).getTime() - new Date(tc.startTime).getTime(); @@ -351,12 +356,12 @@ export async function junitFormat(details: AgentTestResultsResponse): Promise { - if (r.result === 'Failed') { + if (r.result === 'FAIL') { return { $message: r.errorMessage ?? 'Unknown error', $name: r.name }; } }) @@ -374,9 +379,11 @@ export async function tapFormat(details: AgentTestResultsResponse): Promise Date: Tue, 14 Jan 2025 10:40:02 -0700 Subject: [PATCH 4/6] chore: clean up --- src/agentTester.ts | 55 ++++++++++++------- src/index.ts | 5 +- test/agentTester.test.ts | 14 ++--- ...tions_runs_4KBSM000000003F4AQ_results.json | 4 +- 4 files changed, 45 insertions(+), 33 deletions(-) diff --git a/src/agentTester.ts b/src/agentTester.ts index 9b307a0..32129ba 100644 --- a/src/agentTester.ts +++ b/src/agentTester.ts @@ -26,7 +26,6 @@ export type AgentTestStatusResponse = { export type TestCaseResult = { status: TestStatus; utterance: string; - utterence: string; startTime: string; endTime?: string; generatedData: { @@ -246,7 +245,25 @@ function makeSimpleTable(data: Record, title: string): string { return `${title}\n${table}`; } -export async function humanFormat(details: AgentTestResultsResponse): Promise { +export async function convertTestResultsToFormat( + results: AgentTestResultsResponse, + format: 'human' | 'json' | 'junit' | 'tap' +): Promise { + switch (format) { + case 'human': + return humanFormat(results); + case 'json': + return jsonFormat(results); + case 'junit': + return junitFormat(results); + case 'tap': + return tapFormat(results); + default: + throw new Error(`Unsupported format: ${format as string}`); + } +} + +async function humanFormat(details: AgentTestResultsResponse): Promise { const { Ux } = await import('@salesforce/sf-plugins-core'); const ux = new Ux(); @@ -254,9 +271,7 @@ export async function humanFormat(details: AgentTestResultsResponse): Promise ({ @@ -315,11 +330,11 @@ export async function humanFormat(details: AgentTestResultsResponse): Promise { - return Promise.resolve(JSON.stringify(details, null, 2)); +async function jsonFormat(results: AgentTestResultsResponse): Promise { + return Promise.resolve(JSON.stringify(results, null, 2)); } -export async function junitFormat(details: AgentTestResultsResponse): Promise { +async function junitFormat(results: AgentTestResultsResponse): Promise { // eslint-disable-next-line import/no-extraneous-dependencies const { XMLBuilder } = await import('fast-xml-parser'); const builder = new XMLBuilder({ @@ -328,11 +343,11 @@ export async function junitFormat(details: AgentTestResultsResponse): Promise ['ERROR', 'COMPLETED'].includes(tc.status) && tc.expectationResults.some((r) => r.result === 'FAIL') ).length; - const time = details.testSet.testCases.reduce((acc, tc) => { + const time = results.testSet.testCases.reduce((acc, tc) => { if (tc.endTime && tc.startTime) { return acc + new Date(tc.endTime).getTime() - new Date(tc.startTime).getTime(); } @@ -341,22 +356,22 @@ export async function junitFormat(details: AgentTestResultsResponse): Promise { + testsuite: results.testSet.testCases.map((testCase) => { const testCaseTime = testCase.endTime ? new Date(testCase.endTime).getTime() - new Date(testCase.startTime).getTime() : 0; return { - $name: `${details.testSet.name}.${details.testSet.testCases.indexOf(testCase) + 1}`, + $name: `${results.testSet.name}.${results.testSet.testCases.indexOf(testCase) + 1}`, $time: testCaseTime, $assertions: testCase.expectationResults.length, failure: testCase.expectationResults @@ -374,15 +389,15 @@ export async function junitFormat(details: AgentTestResultsResponse): Promise\n${suites}`.trim(); } -export async function tapFormat(details: AgentTestResultsResponse): Promise { +async function tapFormat(results: AgentTestResultsResponse): Promise { const lines: string[] = []; let expectationCount = 0; - for (const testCase of details.testSet.testCases) { + for (const testCase of results.testSet.testCases) { for (const result of testCase.expectationResults) { const status = result.result === 'PASS' ? 'ok' : 'not ok'; expectationCount++; lines.push( - `${status} ${expectationCount} ${details.testSet.name}.${details.testSet.testCases.indexOf(testCase) + 1}` + `${status} ${expectationCount} ${results.testSet.name}.${results.testSet.testCases.indexOf(testCase) + 1}` ); if (status === 'not ok') { lines.push(' ---'); diff --git a/src/index.ts b/src/index.ts index 0d8f5ca..60534a1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -16,10 +16,7 @@ export { export { Agent, AgentCreateLifecycleStages } from './agent'; export { AgentTester, - humanFormat, - jsonFormat, - junitFormat, - tapFormat, + convertTestResultsToFormat, type AgentTestResultsResponse, type AgentTestStartResponse, type AgentTestStatusResponse, diff --git a/test/agentTester.test.ts b/test/agentTester.test.ts index 6d1e2c0..c8ed309 100644 --- a/test/agentTester.test.ts +++ b/test/agentTester.test.ts @@ -8,7 +8,7 @@ import { readFile } from 'node:fs/promises'; import { expect } from 'chai'; import { MockTestOrgData, TestContext } from '@salesforce/core/testSetup'; import { Connection } from '@salesforce/core'; -import { AgentTestResultsResponse, AgentTester, humanFormat, junitFormat, tapFormat } from '../src/agentTester'; +import { AgentTestResultsResponse, AgentTester, convertTestResultsToFormat } from '../src/agentTester'; describe('AgentTester', () => { const $$ = new TestContext(); @@ -82,20 +82,20 @@ describe('AgentTester', () => { }); }); -describe('humanFormat', () => { +describe('human format', () => { it('should transform test results to human readable format', async () => { const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json', 'utf8'); const input = JSON.parse(raw) as AgentTestResultsResponse; - const output = await humanFormat(input); + const output = await convertTestResultsToFormat(input, 'human'); expect(output).to.be.ok; }); }); -describe('junitFormatter', () => { +describe('junit formatter', () => { it('should transform test results to JUnit format', async () => { const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json', 'utf8'); const input = JSON.parse(raw) as AgentTestResultsResponse; - const output = await junitFormat(input); + const output = await convertTestResultsToFormat(input, 'junit'); expect(output).to.deep.equal(` @@ -110,11 +110,11 @@ describe('junitFormatter', () => { }); }); -describe('tapFormatter', () => { +describe('tap formatter', () => { it('should transform test results to TAP format', async () => { const raw = await readFile('./test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json', 'utf8'); const input = JSON.parse(raw) as AgentTestResultsResponse; - const output = await tapFormat(input); + const output = await convertTestResultsToFormat(input, 'tap'); expect(output).to.deep.equal(`Tap Version 14 1..6 ok 1 CRM_Sanity_v1.1 diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json index a8a42a4..4e00471 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json @@ -9,7 +9,7 @@ "testCases": [ { "status": "COMPLETED", - "utterence": "Summarize account Acme", + "utterance": "Summarize account Acme", "startTime": "2024-11-28T12:00:10Z", "endTime": "2024-11-28T12:00:20Z", "generatedData": { @@ -68,7 +68,7 @@ { "status": "COMPLETED", "startTime": "2024-11-28T12:00:30Z", - "utterence": "Summarize the open cases and Activities of acme from sep to nov 2024", + "utterance": "Summarize the open cases and Activities of acme from sep to nov 2024", "endTime": "2024-11-28T12:00:40Z", "generatedData": { "type": "AGENT", From 7a1641c5a53e2abb1e15372405a20415f66bf41e Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Fri, 17 Jan 2025 09:47:24 -0700 Subject: [PATCH 5/6] chore: more api changes --- src/agentTester.ts | 64 ++++++++++++------- src/maybe-mock.ts | 2 +- test/agentTester.test.ts | 4 +- .../1.json | 2 +- .../2.json | 2 +- .../3.json | 2 +- ...tions_runs_4KBSM000000003F4AQ_results.json | 6 +- 7 files changed, 49 insertions(+), 33 deletions(-) diff --git a/src/agentTester.ts b/src/agentTester.ts index 32129ba..da8fefe 100644 --- a/src/agentTester.ts +++ b/src/agentTester.ts @@ -9,7 +9,7 @@ import { Duration, env } from '@salesforce/kit'; import ansis from 'ansis'; import { MaybeMock } from './maybe-mock'; -export type TestStatus = 'NEW' | 'IN_PROGRESS' | 'COMPLETED' | 'ERROR'; +export type TestStatus = 'New' | 'InProgress' | 'Completed' | 'Error'; export type AgentTestStartResponse = { aiEvaluationId: string; @@ -41,7 +41,7 @@ export type TestCaseResult = { actualValue: string; expectedValue: string; score: number; - result: 'PASS' | 'FAIL'; + result: 'PASS' | 'FAILURE'; metricLabel: 'Accuracy' | 'Precision'; metricExplainability: string; status: TestStatus; @@ -121,16 +121,38 @@ export class AgentTester { const lifecycle = Lifecycle.getInstance(); const client = await PollingClient.create({ poll: async (): Promise => { - const resultsResponse = await this.results(jobId); - const totalTestCases = resultsResponse.testSet.testCases.length; - const passingTestCases = resultsResponse.testSet.testCases.filter( - (tc) => tc.status === 'COMPLETED' && tc.expectationResults.every((r) => r.result === 'PASS') - ).length; - const failingTestCases = resultsResponse.testSet.testCases.filter( - (tc) => ['ERROR', 'COMPLETED'].includes(tc.status) && tc.expectationResults.some((r) => r.result === 'FAIL') - ).length; - - if (resultsResponse.status.toLowerCase() === 'completed') { + const statusResponse = await this.status(jobId); + // eslint-disable-next-line no-console + console.log('*'.repeat(process.stdout.columns)); + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-var-requires + const util = require('node:util'); + // eslint-disable-next-line no-console, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call + console.log(util.inspect(statusResponse, { depth: 6 })); + // eslint-disable-next-line no-console + console.log('*'.repeat(process.stdout.columns)); + if (statusResponse.status.toLowerCase() !== 'new') { + const resultsResponse = await this.results(jobId); + const totalTestCases = resultsResponse.testSet.testCases.length; + const passingTestCases = resultsResponse.testSet.testCases.filter( + (tc) => tc.status.toLowerCase() === 'completed' && tc.expectationResults.every((r) => r.result === 'PASS') + ).length; + const failingTestCases = resultsResponse.testSet.testCases.filter( + (tc) => + ['error', 'completed'].includes(tc.status.toLowerCase()) && + tc.expectationResults.some((r) => r.result === 'FAILURE') + ).length; + + if (resultsResponse.status.toLowerCase() === 'completed') { + await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { + jobId, + status: resultsResponse.status, + totalTestCases, + failingTestCases, + passingTestCases, + }); + return { payload: resultsResponse, completed: true }; + } + await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { jobId, status: resultsResponse.status, @@ -138,16 +160,8 @@ export class AgentTester { failingTestCases, passingTestCases, }); - return { payload: resultsResponse, completed: true }; } - await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { - jobId, - status: resultsResponse.status, - totalTestCases, - failingTestCases, - passingTestCases, - }); return { completed: false }; }, frequency: Duration.milliseconds(frequency), @@ -315,12 +329,12 @@ async function humanFormat(details: AgentTestResultsResponse): Promise { const resultsTable = makeSimpleTable(results, ansis.bold.blue('Test Results')); - const failedTestCases = details.testSet.testCases.filter((tc) => tc.status === 'ERROR'); + const failedTestCases = details.testSet.testCases.filter((tc) => tc.status.toLowerCase() === 'error'); const failedTestCasesObj = Object.fromEntries( Object.entries(failedTestCases).map(([, tc]) => [ `Test Case #${failedTestCases.indexOf(tc) + 1}`, tc.expectationResults - .filter((r) => r.result === 'FAIL') + .filter((r) => r.result === 'FAILURE') .map((r) => humanFriendlyName(r.name)) .join(', '), ]) @@ -345,7 +359,9 @@ async function junitFormat(results: AgentTestResultsResponse): Promise { const testCount = results.testSet.testCases.length; const failureCount = results.testSet.testCases.filter( - (tc) => ['ERROR', 'COMPLETED'].includes(tc.status) && tc.expectationResults.some((r) => r.result === 'FAIL') + (tc) => + ['error', 'completed'].includes(tc.status.toLowerCase()) && + tc.expectationResults.some((r) => r.result === 'FAILURE') ).length; const time = results.testSet.testCases.reduce((acc, tc) => { if (tc.endTime && tc.startTime) { @@ -376,7 +392,7 @@ async function junitFormat(results: AgentTestResultsResponse): Promise { $assertions: testCase.expectationResults.length, failure: testCase.expectationResults .map((r) => { - if (r.result === 'FAIL') { + if (r.result === 'FAILURE') { return { $message: r.errorMessage ?? 'Unknown error', $name: r.name }; } }) diff --git a/src/maybe-mock.ts b/src/maybe-mock.ts index 1f8d053..602d515 100644 --- a/src/maybe-mock.ts +++ b/src/maybe-mock.ts @@ -164,7 +164,7 @@ export class MaybeMock { this.logger.debug(`Making ${method} request to ${url}`); switch (method) { case 'GET': - return this.connection.requestGet(url, { retry: { maxRetries: 3 } }); + return this.connection.requestGet(url, { retry: { maxRetries: 10 } }); case 'POST': if (!body) { throw SfError.create({ diff --git a/test/agentTester.test.ts b/test/agentTester.test.ts index c8ed309..1a8ac4b 100644 --- a/test/agentTester.test.ts +++ b/test/agentTester.test.ts @@ -45,7 +45,7 @@ describe('AgentTester', () => { const output = await tester.status('4KBSM000000003F4AQ'); expect(output).to.be.ok; expect(output).to.deep.equal({ - status: 'IN_PROGRESS', + status: 'InProgress', startTime: '2024-11-13T15:00:00.000Z', }); }); @@ -98,7 +98,7 @@ describe('junit formatter', () => { const output = await convertTestResultsToFormat(input, 'junit'); expect(output).to.deep.equal(` - + diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json index daf2bbc..58716da 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json @@ -1,4 +1,4 @@ { - "status": "IN_PROGRESS", + "status": "InProgress", "startTime": "2024-11-13T15:00:00.000Z" } diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json index daf2bbc..58716da 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json @@ -1,4 +1,4 @@ { - "status": "IN_PROGRESS", + "status": "InProgress", "startTime": "2024-11-13T15:00:00.000Z" } diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json index d4f6503..88bd062 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json @@ -1,4 +1,4 @@ { - "status": "COMPLETED", + "status": "Completed", "startTime": "2024-11-13T15:00:00.000Z" } diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json index 4e00471..704b480 100644 --- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json +++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json @@ -1,5 +1,5 @@ { - "status": "COMPLETED", + "status": "Completed", "startTime": "2024-11-28T12:00:00Z", "endTime": "2024-11-28T12:00:48.56Z", "errorMessage": null, @@ -98,7 +98,7 @@ "actualValue": "[\"IdentifyRecordByName\",\"QueryRecords\"]", "expectedValue": "[\"IdentifyRecordByName\",\"QueryRecords\",\"GetActivitiesTimeline\"]", "score": 0.5, - "result": "FAIL", + "result": "FAILURE", "metricLabel": "Precision", "metricExplainability": "Measures the precision of the result.", "status": "Completed", @@ -112,7 +112,7 @@ "actualValue": "It looks like I am unable to find the information you are looking for due to access restrictions. How else can I assist you?", "expectedValue": "Summary of open cases and activities associated with timeline", "score": 0.1, - "result": "FAIL", + "result": "FAILURE", "metricLabel": "Precision", "metricExplainability": "Measures the precision of the result.", "status": "Completed", From 470fa525cba2a32c78f857b652edc3b1feabc47b Mon Sep 17 00:00:00 2001 From: Mike Donnalley Date: Fri, 17 Jan 2025 13:23:51 -0700 Subject: [PATCH 6/6] chore: remove console logs --- src/agentTester.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/agentTester.ts b/src/agentTester.ts index da8fefe..dcfdc79 100644 --- a/src/agentTester.ts +++ b/src/agentTester.ts @@ -122,14 +122,6 @@ export class AgentTester { const client = await PollingClient.create({ poll: async (): Promise => { const statusResponse = await this.status(jobId); - // eslint-disable-next-line no-console - console.log('*'.repeat(process.stdout.columns)); - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-var-requires - const util = require('node:util'); - // eslint-disable-next-line no-console, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call - console.log(util.inspect(statusResponse, { depth: 6 })); - // eslint-disable-next-line no-console - console.log('*'.repeat(process.stdout.columns)); if (statusResponse.status.toLowerCase() !== 'new') { const resultsResponse = await this.results(jobId); const totalTestCases = resultsResponse.testSet.testCases.length;