diff --git a/ai-support.json b/ai-support.json index 16f7cb6825..5bc70a441d 100644 --- a/ai-support.json +++ b/ai-support.json @@ -64,7 +64,19 @@ } ] }, - + { + "name": "Meta Llama3", + "features": [ + { + "title": "Text", + "supported": true + }, + { + "title": "Image", + "supported": false + } + ] + }, { "name": "Amazon Titan", "features": [ diff --git a/lib/llm-events/aws-bedrock/bedrock-command.js b/lib/llm-events/aws-bedrock/bedrock-command.js index ea621aa4ae..937b48c2c0 100644 --- a/lib/llm-events/aws-bedrock/bedrock-command.js +++ b/lib/llm-events/aws-bedrock/bedrock-command.js @@ -37,7 +37,7 @@ class BedrockCommand { result = this.#body.max_tokens_to_sample } else if (this.isClaude3() === true || this.isCohere() === true) { result = this.#body.max_tokens - } else if (this.isLlama2() === true) { + } else if (this.isLlama() === true) { result = this.#body.max_gen_length } else if (this.isTitan() === true) { result = this.#body.textGenerationConfig?.maxTokenCount @@ -80,7 +80,7 @@ class BedrockCommand { this.isClaude() === true || this.isAi21() === true || this.isCohere() === true || - this.isLlama2() === true + this.isLlama() === true ) { result = this.#body.prompt } else if (this.isClaude3() === true) { @@ -104,7 +104,7 @@ class BedrockCommand { this.isClaude3() === true || this.isAi21() === true || this.isCohere() === true || - this.isLlama2() === true + this.isLlama() === true ) { result = this.#body.temperature } @@ -131,8 +131,8 @@ class BedrockCommand { return this.#modelId.startsWith('cohere.embed') } - isLlama2() { - return this.#modelId.startsWith('meta.llama2') + isLlama() { + return this.#modelId.startsWith('meta.llama') } isTitan() { diff --git a/lib/llm-events/aws-bedrock/bedrock-response.js b/lib/llm-events/aws-bedrock/bedrock-response.js index 5f0354ac97..0d5ec61319 100644 --- a/lib/llm-events/aws-bedrock/bedrock-response.js +++ b/lib/llm-events/aws-bedrock/bedrock-response.js @@ -70,7 +70,7 @@ class BedrockResponse { } else if (cmd.isCohere() === true) { this.#completions = body.generations?.map((g) => g.text) ?? [] this.#id = body.id - } else if (cmd.isLlama2() === true) { + } else if (cmd.isLlama() === true) { body.generation && this.#completions.push(body.generation) } else if (cmd.isTitan() === true) { this.#completions = body.results?.map((r) => r.outputText) ?? [] @@ -107,7 +107,7 @@ class BedrockResponse { result = this.#parsedBody.stop_reason } else if (cmd.isCohere() === true) { result = this.#parsedBody.generations?.find((r) => r.finish_reason !== null)?.finish_reason - } else if (cmd.isLlama2() === true) { + } else if (cmd.isLlama() === true) { result = this.#parsedBody.stop_reason } else if (cmd.isTitan() === true) { result = this.#parsedBody.results?.find((r) => r.completionReason !== null)?.completionReason diff --git a/lib/llm-events/aws-bedrock/stream-handler.js b/lib/llm-events/aws-bedrock/stream-handler.js index e9f00c1cf6..9eaf9b70cc 100644 --- a/lib/llm-events/aws-bedrock/stream-handler.js +++ b/lib/llm-events/aws-bedrock/stream-handler.js @@ -114,9 +114,9 @@ class StreamHandler { } else if (bedrockCommand.isCohereEmbed() === true) { this.stopReasonKey = 'nr_none' this.generator = handleCohereEmbed - } else if (bedrockCommand.isLlama2() === true) { + } else if (bedrockCommand.isLlama() === true) { this.stopReasonKey = 'stop_reason' - this.generator = handleLlama2 + this.generator = handleLlama } else if (bedrockCommand.isTitan() === true) { this.stopReasonKey = 'completionReason' this.generator = handleTitan @@ -271,7 +271,7 @@ async function* handleCohereEmbed() { } } -async function* handleLlama2() { +async function* handleLlama() { let currentBody = {} let generation = '' diff --git a/test/lib/aws-server-stubs/ai-server/index.js b/test/lib/aws-server-stubs/ai-server/index.js index 78bf442ef3..31c65a0dc2 100644 --- a/test/lib/aws-server-stubs/ai-server/index.js +++ b/test/lib/aws-server-stubs/ai-server/index.js @@ -114,8 +114,11 @@ function handler(req, res) { } case 'meta.llama2-13b-chat-v1': - case 'meta.llama2-70b-chat-v1': { - response = responses.llama2.get(payload.prompt) + case 'meta.llama2-70b-chat-v1': + // llama3 responses are indentical, just return llama2 data + case 'meta.llama3-8b-instruct-v1:0': + case 'meta.llama3-70b-instruct-v1:0': { + response = responses.llama.get(payload.prompt) break } diff --git a/test/lib/aws-server-stubs/ai-server/responses/index.js b/test/lib/aws-server-stubs/ai-server/responses/index.js index f9ad466d37..78183597f2 100644 --- a/test/lib/aws-server-stubs/ai-server/responses/index.js +++ b/test/lib/aws-server-stubs/ai-server/responses/index.js @@ -10,7 +10,7 @@ const amazon = require('./amazon') const claude = require('./claude') const claude3 = require('./claude3') const cohere = require('./cohere') -const llama2 = require('./llama2') +const llama = require('./llama') module.exports = { ai21, @@ -18,5 +18,5 @@ module.exports = { claude, claude3, cohere, - llama2 + llama } diff --git a/test/lib/aws-server-stubs/ai-server/responses/llama2.js b/test/lib/aws-server-stubs/ai-server/responses/llama.js similarity index 92% rename from test/lib/aws-server-stubs/ai-server/responses/llama2.js rename to test/lib/aws-server-stubs/ai-server/responses/llama.js index b2ddcfb8ca..cf5792af4d 100644 --- a/test/lib/aws-server-stubs/ai-server/responses/llama2.js +++ b/test/lib/aws-server-stubs/ai-server/responses/llama.js @@ -8,7 +8,7 @@ const responses = new Map() const { contentType, reqId } = require('./constants') -responses.set('text llama2 ultimate question', { +responses.set('text llama ultimate question', { headers: { 'content-type': contentType, 'x-amzn-requestid': reqId, @@ -25,7 +25,7 @@ responses.set('text llama2 ultimate question', { } }) -responses.set('text llama2 ultimate question streamed', { +responses.set('text llama ultimate question streamed', { headers: { 'content-type': 'application/vnd.amazon.eventstream', 'x-amzn-requestid': reqId, @@ -68,7 +68,7 @@ responses.set('text llama2 ultimate question streamed', { ] }) -responses.set('text llama2 ultimate question error', { +responses.set('text llama ultimate question error', { headers: { 'content-type': contentType, 'x-amzn-requestid': reqId, diff --git a/test/unit/llm-events/aws-bedrock/bedrock-command.test.js b/test/unit/llm-events/aws-bedrock/bedrock-command.test.js index 8d07461078..e19da844aa 100644 --- a/test/unit/llm-events/aws-bedrock/bedrock-command.test.js +++ b/test/unit/llm-events/aws-bedrock/bedrock-command.test.js @@ -52,6 +52,13 @@ const llama2 = { } } +const llama3 = { + modelId: 'meta.llama3-8b-instruct-v1:0', + body: { + prompt: 'who are you' + } +} + const titan = { modelId: 'amazon.titan-text-lite-v1', body: { @@ -85,7 +92,7 @@ tap.test('non-conforming command is handled gracefully', async (t) => { 'Claude3', 'Cohere', 'CohereEmbed', - 'Llama2', + 'Llama', 'Titan', 'TitanEmbed' ]) { @@ -212,7 +219,7 @@ tap.test('cohere embed minimal command works', async (t) => { tap.test('llama2 minimal command works', async (t) => { t.context.updatePayload(structuredClone(llama2)) const cmd = new BedrockCommand(t.context.input) - t.equal(cmd.isLlama2(), true) + t.equal(cmd.isLlama(), true) t.equal(cmd.maxTokens, undefined) t.equal(cmd.modelId, llama2.modelId) t.equal(cmd.modelType, 'completion') @@ -226,7 +233,32 @@ tap.test('llama2 complete command works', async (t) => { payload.body.temperature = 0.5 t.context.updatePayload(payload) const cmd = new BedrockCommand(t.context.input) - t.equal(cmd.isLlama2(), true) + t.equal(cmd.isLlama(), true) + t.equal(cmd.maxTokens, 25) + t.equal(cmd.modelId, payload.modelId) + t.equal(cmd.modelType, 'completion') + t.equal(cmd.prompt, payload.body.prompt) + t.equal(cmd.temperature, payload.body.temperature) +}) + +tap.test('llama3 minimal command works', async (t) => { + t.context.updatePayload(structuredClone(llama3)) + const cmd = new BedrockCommand(t.context.input) + t.equal(cmd.isLlama(), true) + t.equal(cmd.maxTokens, undefined) + t.equal(cmd.modelId, llama3.modelId) + t.equal(cmd.modelType, 'completion') + t.equal(cmd.prompt, llama3.body.prompt) + t.equal(cmd.temperature, undefined) +}) + +tap.test('llama3 complete command works', async (t) => { + const payload = structuredClone(llama3) + payload.body.max_gen_length = 25 + payload.body.temperature = 0.5 + t.context.updatePayload(payload) + const cmd = new BedrockCommand(t.context.input) + t.equal(cmd.isLlama(), true) t.equal(cmd.maxTokens, 25) t.equal(cmd.modelId, payload.modelId) t.equal(cmd.modelType, 'completion') diff --git a/test/unit/llm-events/aws-bedrock/bedrock-response.test.js b/test/unit/llm-events/aws-bedrock/bedrock-response.test.js index a0a5106d7e..e2a6cdb976 100644 --- a/test/unit/llm-events/aws-bedrock/bedrock-response.test.js +++ b/test/unit/llm-events/aws-bedrock/bedrock-response.test.js @@ -38,8 +38,8 @@ const cohere = { ] } -const llama2 = { - generation: 'llama2-response', +const llama = { + generation: 'llama-response', stop_reason: 'done' } @@ -79,7 +79,7 @@ tap.beforeEach((t) => { isCohere() { return false }, - isLlama2() { + isLlama() { return false }, isTitan() { @@ -172,8 +172,8 @@ tap.test('cohere complete responses work', async (t) => { t.equal(res.statusCode, 200) }) -tap.test('llama2 malformed responses work', async (t) => { - t.context.bedrockCommand.isLlama2 = () => true +tap.test('llama malformed responses work', async (t) => { + t.context.bedrockCommand.isLlama = () => true const res = new BedrockResponse(t.context) t.same(res.completions, []) t.equal(res.finishReason, undefined) @@ -183,11 +183,11 @@ tap.test('llama2 malformed responses work', async (t) => { t.equal(res.statusCode, 200) }) -tap.test('llama2 complete responses work', async (t) => { - t.context.bedrockCommand.isLlama2 = () => true - t.context.updatePayload(structuredClone(llama2)) +tap.test('llama complete responses work', async (t) => { + t.context.bedrockCommand.isLlama = () => true + t.context.updatePayload(structuredClone(llama)) const res = new BedrockResponse(t.context) - t.same(res.completions, ['llama2-response']) + t.same(res.completions, ['llama-response']) t.equal(res.finishReason, 'done') t.same(res.headers, t.context.response.response.headers) t.equal(res.id, undefined) diff --git a/test/unit/llm-events/aws-bedrock/stream-handler.test.js b/test/unit/llm-events/aws-bedrock/stream-handler.test.js index 613a0bbf29..2d892178ec 100644 --- a/test/unit/llm-events/aws-bedrock/stream-handler.test.js +++ b/test/unit/llm-events/aws-bedrock/stream-handler.test.js @@ -45,7 +45,7 @@ tap.beforeEach((t) => { isClaude3() { return false }, - isLlama2() { + isLlama() { return false }, isTitan() { @@ -242,15 +242,15 @@ tap.test('handles cohere embedding streams', async (t) => { t.equal(br.statusCode, 200) }) -tap.test('handles llama2 streams', async (t) => { - t.context.passThroughParams.bedrockCommand.isLlama2 = () => true +tap.test('handles llama streams', async (t) => { + t.context.passThroughParams.bedrockCommand.isLlama = () => true t.context.chunks = [ { generation: '1', stop_reason: null }, { generation: '2', stop_reason: 'done', ...t.context.metrics } ] const handler = new StreamHandler(t.context) - t.equal(handler.generator.name, 'handleLlama2') + t.equal(handler.generator.name, 'handleLlama') for await (const event of handler.generator()) { t.type(event.chunk.bytes, Uint8Array) } @@ -267,7 +267,7 @@ tap.test('handles llama2 streams', async (t) => { }) const bc = new BedrockCommand({ - modelId: 'meta.llama2', + modelId: 'meta.llama', body: JSON.stringify({ prompt: 'prompt', max_gen_length: 5 diff --git a/test/versioned/aws-sdk-v3/bedrock-chat-completions.tap.js b/test/versioned/aws-sdk-v3/bedrock-chat-completions.tap.js index eb179cc496..61b2bc9b42 100644 --- a/test/versioned/aws-sdk-v3/bedrock-chat-completions.tap.js +++ b/test/versioned/aws-sdk-v3/bedrock-chat-completions.tap.js @@ -48,7 +48,7 @@ const requests = { body: JSON.stringify({ prompt, temperature: 0.5, max_tokens: 100 }), modelId }), - llama2: (prompt, modelId) => ({ + llama: (prompt, modelId) => ({ body: JSON.stringify({ prompt, max_gen_length: 100, temperature: 0.5 }), modelId }) @@ -98,7 +98,8 @@ tap.afterEach(async (t) => { { modelId: 'anthropic.claude-v2', resKey: 'claude' }, { modelId: 'anthropic.claude-3-haiku-20240307-v1:0', resKey: 'claude3' }, { modelId: 'cohere.command-text-v14', resKey: 'cohere' }, - { modelId: 'meta.llama2-13b-chat-v1', resKey: 'llama2' } + { modelId: 'meta.llama2-13b-chat-v1', resKey: 'llama' }, + { modelId: 'meta.llama3-8b-instruct-v1:0', resKey: 'llama' } ].forEach(({ modelId, resKey }) => { tap.test(`${modelId}: should properly create completion segment`, (t) => { const { bedrock, client, responses, agent, expectedExternalPath } = t.context