From c8ab3e444002812c22cfe72e31b86faf7ca6e568 Mon Sep 17 00:00:00 2001 From: Sam Brenner <106700075+sabrenner@users.noreply.github.com> Date: Wed, 20 Nov 2024 11:12:48 -0500 Subject: [PATCH] [MLOB-1804] feat(langchain): add langchain instrumentation (#4860) * wip * wip * first pass at chain invoke and chat,llm generate * add langchain openai embeddings * add batch call * change api key logic * testing * ts def changes * codeowners changes * add clarifying issue as reason for skipping esm tests * fix langchain patching for possible esm files vs commonjs files, namespace * configurable truncation and prompt completion sampling * remove unneeded util file * remove some unneeded code * fix patching esm vs cjs issues * json stringify non-string chain outputs * apikey, model, provider should no-op by default * add some token handling logic * review comments * check lc_ for ignored properties --- .github/workflows/plugins.yml | 8 + CODEOWNERS | 2 + docs/test.ts | 1 + index.d.ts | 7 + .../src/helpers/hooks.js | 3 + .../datadog-instrumentations/src/langchain.js | 77 ++ .../src/handlers/chain.js | 50 + .../src/handlers/default.js | 53 ++ .../src/handlers/embedding.js | 63 ++ .../handlers/language_models/chat_model.js | 99 ++ .../src/handlers/language_models/index.js | 48 + .../src/handlers/language_models/llm.js | 57 ++ .../datadog-plugin-langchain/src/index.js | 89 ++ .../datadog-plugin-langchain/src/tokens.js | 35 + .../test/index.spec.js | 878 ++++++++++++++++++ .../test/integration-test/client.spec.js | 55 ++ .../test/integration-test/server.mjs | 18 + packages/dd-trace/src/config.js | 8 + packages/dd-trace/src/plugins/index.js | 3 + packages/dd-trace/test/config.spec.js | 8 +- packages/dd-trace/test/plugins/externals.json | 6 + 21 files changed, 1567 insertions(+), 1 deletion(-) create mode 100644 packages/datadog-instrumentations/src/langchain.js create mode 100644 packages/datadog-plugin-langchain/src/handlers/chain.js create mode 100644 packages/datadog-plugin-langchain/src/handlers/default.js create mode 100644 packages/datadog-plugin-langchain/src/handlers/embedding.js create mode 100644 packages/datadog-plugin-langchain/src/handlers/language_models/chat_model.js create mode 100644 packages/datadog-plugin-langchain/src/handlers/language_models/index.js create mode 100644 packages/datadog-plugin-langchain/src/handlers/language_models/llm.js create mode 100644 packages/datadog-plugin-langchain/src/index.js create mode 100644 packages/datadog-plugin-langchain/src/tokens.js create mode 100644 packages/datadog-plugin-langchain/test/index.spec.js create mode 100644 packages/datadog-plugin-langchain/test/integration-test/client.spec.js create mode 100644 packages/datadog-plugin-langchain/test/integration-test/server.mjs diff --git a/.github/workflows/plugins.yml b/.github/workflows/plugins.yml index 0e067a98fb5..9ba9daa9277 100644 --- a/.github/workflows/plugins.yml +++ b/.github/workflows/plugins.yml @@ -561,6 +561,14 @@ jobs: - uses: actions/checkout@v4 - uses: ./.github/actions/plugins/test-and-upstream + langchain: + runs-on: ubuntu-latest + env: + PLUGINS: langchain + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/plugins/test + limitd-client: runs-on: ubuntu-latest services: diff --git a/CODEOWNERS b/CODEOWNERS index 3b45215923f..52963649952 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -56,7 +56,9 @@ /packages/dd-trace/src/llmobs/ @DataDog/ml-observability /packages/dd-trace/test/llmobs/ @DataDog/ml-observability /packages/datadog-plugin-openai/ @DataDog/ml-observability +/packages/datadog-plugin-langchain/ @DataDog/ml-observability /packages/datadog-instrumentations/src/openai.js @DataDog/ml-observability +/packages/datadog-instrumentations/src/langchain.js @DataDog/ml-observability # CI /.github/workflows/appsec.yml @DataDog/asm-js diff --git a/docs/test.ts b/docs/test.ts index 8991c8680a5..479b4620b4d 100644 --- a/docs/test.ts +++ b/docs/test.ts @@ -342,6 +342,7 @@ tracer.use('kafkajs'); tracer.use('knex'); tracer.use('koa'); tracer.use('koa', httpServerOptions); +tracer.use('langchain'); tracer.use('mariadb', { service: () => `my-custom-mariadb` }) tracer.use('memcached'); tracer.use('microgateway-core'); diff --git a/index.d.ts b/index.d.ts index f8d4679c570..9b4becec957 100644 --- a/index.d.ts +++ b/index.d.ts @@ -179,6 +179,7 @@ interface Plugins { "kafkajs": tracer.plugins.kafkajs "knex": tracer.plugins.knex; "koa": tracer.plugins.koa; + "langchain": tracer.plugins.langchain; "mariadb": tracer.plugins.mariadb; "memcached": tracer.plugins.memcached; "microgateway-core": tracer.plugins.microgateway_core; @@ -1592,6 +1593,12 @@ declare namespace tracer { */ interface kafkajs extends Instrumentation {} + /** + * This plugin automatically instruments the + * [langchain](https://js.langchain.com/) module + */ + interface langchain extends Instrumentation {} + /** * This plugin automatically instruments the * [ldapjs](https://github.com/ldapjs/node-ldapjs/) module. diff --git a/packages/datadog-instrumentations/src/helpers/hooks.js b/packages/datadog-instrumentations/src/helpers/hooks.js index 948d3c5fe28..4261d4dae44 100644 --- a/packages/datadog-instrumentations/src/helpers/hooks.js +++ b/packages/datadog-instrumentations/src/helpers/hooks.js @@ -19,6 +19,8 @@ module.exports = { '@jest/test-sequencer': () => require('../jest'), '@jest/transform': () => require('../jest'), '@koa/router': () => require('../koa'), + '@langchain/core': () => require('../langchain'), + '@langchain/openai': () => require('../langchain'), '@node-redis/client': () => require('../redis'), '@opensearch-project/opensearch': () => require('../opensearch'), '@opentelemetry/sdk-trace-node': () => require('../otel-sdk-trace'), @@ -67,6 +69,7 @@ module.exports = { koa: () => require('../koa'), 'koa-router': () => require('../koa'), kafkajs: () => require('../kafkajs'), + langchain: () => require('../langchain'), ldapjs: () => require('../ldapjs'), 'limitd-client': () => require('../limitd-client'), lodash: () => require('../lodash'), diff --git a/packages/datadog-instrumentations/src/langchain.js b/packages/datadog-instrumentations/src/langchain.js new file mode 100644 index 00000000000..6b9321c5ab5 --- /dev/null +++ b/packages/datadog-instrumentations/src/langchain.js @@ -0,0 +1,77 @@ +'use strict' + +const { addHook } = require('./helpers/instrument') +const shimmer = require('../../datadog-shimmer') + +const tracingChannel = require('dc-polyfill').tracingChannel + +const invokeTracingChannel = tracingChannel('apm:langchain:invoke') + +function wrapLangChainPromise (fn, type, namespace = []) { + return function () { + if (!invokeTracingChannel.start.hasSubscribers) { + return fn.apply(this, arguments) + } + + // Runnable interfaces have an `lc_namespace` property + const ns = this.lc_namespace || namespace + const resource = [...ns, this.constructor.name].join('.') + + const ctx = { + args: arguments, + instance: this, + type, + resource + } + + return invokeTracingChannel.tracePromise(fn, ctx, this, ...arguments) + } +} + +// langchain compiles into ESM and CommonJS, with ESM being the default and landing in the `.js` files +// however, CommonJS ends up in `cjs` files, and are required under the hood with `.cjs` files +// we patch each separately and explicitly to match against exports only once, and not rely on file regex matching +const extensions = ['js', 'cjs'] + +for (const extension of extensions) { + addHook({ name: '@langchain/core', file: `dist/runnables/base.${extension}`, versions: ['>=0.1'] }, exports => { + const RunnableSequence = exports.RunnableSequence + shimmer.wrap(RunnableSequence.prototype, 'invoke', invoke => wrapLangChainPromise(invoke, 'chain')) + shimmer.wrap(RunnableSequence.prototype, 'batch', batch => wrapLangChainPromise(batch, 'chain')) + return exports + }) + + addHook({ + name: '@langchain/core', + file: `dist/language_models/chat_models.${extension}`, + versions: ['>=0.1'] + }, exports => { + const BaseChatModel = exports.BaseChatModel + shimmer.wrap( + BaseChatModel.prototype, + 'generate', + generate => wrapLangChainPromise(generate, 'chat_model') + ) + return exports + }) + + addHook({ name: '@langchain/core', file: `dist/language_models/llms.${extension}`, versions: ['>=0.1'] }, exports => { + const BaseLLM = exports.BaseLLM + shimmer.wrap(BaseLLM.prototype, 'generate', generate => wrapLangChainPromise(generate, 'llm')) + return exports + }) + + addHook({ name: '@langchain/openai', file: `dist/embeddings.${extension}`, versions: ['>=0.1'] }, exports => { + const OpenAIEmbeddings = exports.OpenAIEmbeddings + + // OpenAI (and Embeddings in general) do not define an lc_namespace + const namespace = ['langchain', 'embeddings', 'openai'] + shimmer.wrap(OpenAIEmbeddings.prototype, 'embedDocuments', embedDocuments => + wrapLangChainPromise(embedDocuments, 'embedding', namespace) + ) + shimmer.wrap(OpenAIEmbeddings.prototype, 'embedQuery', embedQuery => + wrapLangChainPromise(embedQuery, 'embedding', namespace) + ) + return exports + }) +} diff --git a/packages/datadog-plugin-langchain/src/handlers/chain.js b/packages/datadog-plugin-langchain/src/handlers/chain.js new file mode 100644 index 00000000000..81374587cc6 --- /dev/null +++ b/packages/datadog-plugin-langchain/src/handlers/chain.js @@ -0,0 +1,50 @@ +'use strict' + +const LangChainHandler = require('./default') + +class LangChainChainHandler extends LangChainHandler { + getSpanStartTags (ctx) { + const tags = {} + + if (!this.isPromptCompletionSampled()) return tags + + let inputs = ctx.args?.[0] + inputs = Array.isArray(inputs) ? inputs : [inputs] + + for (const idx in inputs) { + const input = inputs[idx] + if (typeof input !== 'object') { + tags[`langchain.request.inputs.${idx}`] = this.normalize(input) + } else { + for (const [key, value] of Object.entries(input)) { + // these are mappings to the python client names, ie lc_kwargs + // only present on BaseMessage types + if (key.includes('lc_')) continue + tags[`langchain.request.inputs.${idx}.${key}`] = this.normalize(value) + } + } + } + + return tags + } + + getSpanEndTags (ctx) { + const tags = {} + + if (!this.isPromptCompletionSampled()) return tags + + let outputs = ctx.result + outputs = Array.isArray(outputs) ? outputs : [outputs] + + for (const idx in outputs) { + const output = outputs[idx] + tags[`langchain.response.outputs.${idx}`] = this.normalize( + typeof output === 'string' ? output : JSON.stringify(output) + ) + } + + return tags + } +} + +module.exports = LangChainChainHandler diff --git a/packages/datadog-plugin-langchain/src/handlers/default.js b/packages/datadog-plugin-langchain/src/handlers/default.js new file mode 100644 index 00000000000..103f7c1f98d --- /dev/null +++ b/packages/datadog-plugin-langchain/src/handlers/default.js @@ -0,0 +1,53 @@ +'use strict' + +const Sampler = require('../../../dd-trace/src/sampler') + +const RE_NEWLINE = /\n/g +const RE_TAB = /\t/g + +// TODO: should probably refactor the OpenAI integration to use a shared LLMTracingPlugin base class +// This logic isn't particular to LangChain +class LangChainHandler { + constructor (config) { + this.config = config + this.sampler = new Sampler(config.spanPromptCompletionSampleRate) + } + + // no-op for default handler + getSpanStartTags (ctx) {} + + // no-op for default handler + getSpanEndTags (ctx) {} + + // no-op for default handler + extractApiKey (instance) {} + + // no-op for default handler + extractProvider (instance) {} + + // no-op for default handler + extractModel (instance) {} + + normalize (text) { + if (!text) return + if (typeof text !== 'string' || !text || (typeof text === 'string' && text.length === 0)) return + + const max = this.config.spanCharLimit + + text = text + .replace(RE_NEWLINE, '\\n') + .replace(RE_TAB, '\\t') + + if (text.length > max) { + return text.substring(0, max) + '...' + } + + return text + } + + isPromptCompletionSampled () { + return this.sampler.isSampled() + } +} + +module.exports = LangChainHandler diff --git a/packages/datadog-plugin-langchain/src/handlers/embedding.js b/packages/datadog-plugin-langchain/src/handlers/embedding.js new file mode 100644 index 00000000000..aa37825b2d8 --- /dev/null +++ b/packages/datadog-plugin-langchain/src/handlers/embedding.js @@ -0,0 +1,63 @@ +'use strict' + +const LangChainHandler = require('./default') + +class LangChainEmbeddingHandler extends LangChainHandler { + getSpanStartTags (ctx) { + const tags = {} + + const inputTexts = ctx.args?.[0] + + const sampled = this.isPromptCompletionSampled() + if (typeof inputTexts === 'string') { + // embed query + if (sampled) { + tags['langchain.request.inputs.0.text'] = this.normalize(inputTexts) + } + tags['langchain.request.input_counts'] = 1 + } else { + // embed documents + if (sampled) { + for (const idx in inputTexts) { + const inputText = inputTexts[idx] + tags[`langchain.request.inputs.${idx}.text`] = this.normalize(inputText) + } + } + tags['langchain.request.input_counts'] = inputTexts.length + } + + return tags + } + + getSpanEndTags (ctx) { + const tags = {} + + const { result } = ctx + if (!Array.isArray(result)) return + + tags['langchain.response.outputs.embedding_length'] = ( + Array.isArray(result[0]) ? result[0] : result + ).length + + return tags + } + + extractApiKey (instance) { + const apiKey = instance.clientConfig?.apiKey + if (!apiKey || apiKey.length < 4) return '' + return `...${apiKey.slice(-4)}` + } + + extractProvider (instance) { + return instance.constructor.name.split('Embeddings')[0].toLowerCase() + } + + extractModel (instance) { + for (const attr of ['model', 'modelName', 'modelId', 'modelKey', 'repoId']) { + const modelName = instance[attr] + if (modelName) return modelName + } + } +} + +module.exports = LangChainEmbeddingHandler diff --git a/packages/datadog-plugin-langchain/src/handlers/language_models/chat_model.js b/packages/datadog-plugin-langchain/src/handlers/language_models/chat_model.js new file mode 100644 index 00000000000..681e5deb050 --- /dev/null +++ b/packages/datadog-plugin-langchain/src/handlers/language_models/chat_model.js @@ -0,0 +1,99 @@ +'use strict' + +const LangChainLanguageModelHandler = require('.') + +const COMPLETIONS = 'langchain.response.completions' + +class LangChainChatModelHandler extends LangChainLanguageModelHandler { + getSpanStartTags (ctx, provider) { + const tags = {} + + const inputs = ctx.args?.[0] + + for (const messageSetIndex in inputs) { + const messageSet = inputs[messageSetIndex] + + for (const messageIndex in messageSet) { + const message = messageSet[messageIndex] + if (this.isPromptCompletionSampled()) { + tags[`langchain.request.messages.${messageSetIndex}.${messageIndex}.content`] = + this.normalize(message.content) || '' + } + tags[`langchain.request.messages.${messageSetIndex}.${messageIndex}.message_type`] = message.constructor.name + } + } + + const instance = ctx.instance + const identifyingParams = (typeof instance._identifyingParams === 'function' && instance._identifyingParams()) || {} + for (const [param, val] of Object.entries(identifyingParams)) { + if (param.toLowerCase().includes('apikey') || param.toLowerCase().includes('apitoken')) continue + if (typeof val === 'object') { + for (const [key, value] of Object.entries(val)) { + tags[`langchain.request.${provider}.parameters.${param}.${key}`] = value + } + } else { + tags[`langchain.request.${provider}.parameters.${param}`] = val + } + } + + return tags + } + + getSpanEndTags (ctx) { + const { result } = ctx + + const tags = {} + + this.extractTokenMetrics(ctx.currentStore?.span, result) + + for (const messageSetIdx in result.generations) { + const messageSet = result.generations[messageSetIdx] + + for (const chatCompletionIdx in messageSet) { + const chatCompletion = messageSet[chatCompletionIdx] + + const text = chatCompletion.text + const message = chatCompletion.message + let toolCalls = message.tool_calls + + if (text && this.isPromptCompletionSampled()) { + tags[ + `${COMPLETIONS}.${messageSetIdx}.${chatCompletionIdx}.content` + ] = this.normalize(text) + } + + tags[ + `${COMPLETIONS}.${messageSetIdx}.${chatCompletionIdx}.message_type` + ] = message.constructor.name + + if (toolCalls) { + if (!Array.isArray(toolCalls)) { + toolCalls = [toolCalls] + } + + for (const toolCallIndex in toolCalls) { + const toolCall = toolCalls[toolCallIndex] + + tags[ + `${COMPLETIONS}.${messageSetIdx}.${chatCompletionIdx}.tool_calls.${toolCallIndex}.id` + ] = toolCall.id + tags[ + `${COMPLETIONS}.${messageSetIdx}.${chatCompletionIdx}.tool_calls.${toolCallIndex}.name` + ] = toolCall.name + + const args = toolCall.args || {} + for (const [name, value] of Object.entries(args)) { + tags[ + `${COMPLETIONS}.${messageSetIdx}.${chatCompletionIdx}.tool_calls.${toolCallIndex}.args.${name}` + ] = this.normalize(value) + } + } + } + } + } + + return tags + } +} + +module.exports = LangChainChatModelHandler diff --git a/packages/datadog-plugin-langchain/src/handlers/language_models/index.js b/packages/datadog-plugin-langchain/src/handlers/language_models/index.js new file mode 100644 index 00000000000..b67dfa2e2dd --- /dev/null +++ b/packages/datadog-plugin-langchain/src/handlers/language_models/index.js @@ -0,0 +1,48 @@ +'use strict' + +const { getTokensFromLlmOutput } = require('../../tokens') +const LangChainHandler = require('../default') + +class LangChainLanguageModelHandler extends LangChainHandler { + extractApiKey (instance) { + const key = Object.keys(instance) + .find(key => { + const lower = key.toLowerCase() + return lower.includes('apikey') || lower.includes('apitoken') + }) + + let apiKey = instance[key] + if (apiKey?.secretValue && typeof apiKey.secretValue === 'function') { + apiKey = apiKey.secretValue() + } + if (!apiKey || apiKey.length < 4) return '' + return `...${apiKey.slice(-4)}` + } + + extractProvider (instance) { + return typeof instance._llmType === 'function' && instance._llmType().split('-')[0] + } + + extractModel (instance) { + for (const attr of ['model', 'modelName', 'modelId', 'modelKey', 'repoId']) { + const modelName = instance[attr] + if (modelName) return modelName + } + } + + extractTokenMetrics (span, result) { + if (!span || !result) return + + // we do not tag token metrics for non-openai providers + const provider = span.context()._tags['langchain.request.provider'] + if (provider !== 'openai') return + + const tokens = getTokensFromLlmOutput(result) + + for (const [tokenKey, tokenCount] of Object.entries(tokens)) { + span.setTag(`langchain.tokens.${tokenKey}_tokens`, tokenCount) + } + } +} + +module.exports = LangChainLanguageModelHandler diff --git a/packages/datadog-plugin-langchain/src/handlers/language_models/llm.js b/packages/datadog-plugin-langchain/src/handlers/language_models/llm.js new file mode 100644 index 00000000000..acd4967fd8d --- /dev/null +++ b/packages/datadog-plugin-langchain/src/handlers/language_models/llm.js @@ -0,0 +1,57 @@ +'use strict' + +const LangChainLanguageModelHandler = require('.') + +class LangChainLLMHandler extends LangChainLanguageModelHandler { + getSpanStartTags (ctx, provider) { + const tags = {} + + const prompts = ctx.args?.[0] + for (const promptIdx in prompts) { + if (!this.isPromptCompletionSampled()) continue + + const prompt = prompts[promptIdx] + tags[`langchain.request.prompts.${promptIdx}.content`] = this.normalize(prompt) || '' + } + + const instance = ctx.instance + const identifyingParams = (typeof instance._identifyingParams === 'function' && instance._identifyingParams()) || {} + for (const [param, val] of Object.entries(identifyingParams)) { + if (param.toLowerCase().includes('apikey') || param.toLowerCase().includes('apitoken')) continue + if (typeof val === 'object') { + for (const [key, value] of Object.entries(val)) { + tags[`langchain.request.${provider}.parameters.${param}.${key}`] = value + } + } else { + tags[`langchain.request.${provider}.parameters.${param}`] = val + } + } + + return tags + } + + getSpanEndTags (ctx) { + const { result } = ctx + + const tags = {} + + this.extractTokenMetrics(ctx.currentStore?.span, result) + + for (const completionIdx in result.generations) { + const completion = result.generations[completionIdx] + if (this.isPromptCompletionSampled()) { + tags[`langchain.response.completions.${completionIdx}.text`] = this.normalize(completion[0].text) || '' + } + + if (completion && completion[0].generationInfo) { + const generationInfo = completion[0].generationInfo + tags[`langchain.response.completions.${completionIdx}.finish_reason`] = generationInfo.finishReason + tags[`langchain.response.completions.${completionIdx}.logprobs`] = generationInfo.logprobs + } + } + + return tags + } +} + +module.exports = LangChainLLMHandler diff --git a/packages/datadog-plugin-langchain/src/index.js b/packages/datadog-plugin-langchain/src/index.js new file mode 100644 index 00000000000..19b6e7d9793 --- /dev/null +++ b/packages/datadog-plugin-langchain/src/index.js @@ -0,0 +1,89 @@ +'use strict' + +const { MEASURED } = require('../../../ext/tags') +const { storage } = require('../../datadog-core') +const TracingPlugin = require('../../dd-trace/src/plugins/tracing') + +const API_KEY = 'langchain.request.api_key' +const MODEL = 'langchain.request.model' +const PROVIDER = 'langchain.request.provider' +const TYPE = 'langchain.request.type' + +const LangChainHandler = require('./handlers/default') +const LangChainChatModelHandler = require('./handlers/language_models/chat_model') +const LangChainLLMHandler = require('./handlers/language_models/llm') +const LangChainChainHandler = require('./handlers/chain') +const LangChainEmbeddingHandler = require('./handlers/embedding') + +class LangChainPlugin extends TracingPlugin { + static get id () { return 'langchain' } + static get operation () { return 'invoke' } + static get system () { return 'langchain' } + static get prefix () { + return 'tracing:apm:langchain:invoke' + } + + constructor () { + super(...arguments) + + const langchainConfig = this._tracerConfig.langchain || {} + this.handlers = { + chain: new LangChainChainHandler(langchainConfig), + chat_model: new LangChainChatModelHandler(langchainConfig), + llm: new LangChainLLMHandler(langchainConfig), + embedding: new LangChainEmbeddingHandler(langchainConfig), + default: new LangChainHandler(langchainConfig) + } + } + + bindStart (ctx) { + const { resource, type } = ctx + const handler = this.handlers[type] + + const instance = ctx.instance + const apiKey = handler.extractApiKey(instance) + const provider = handler.extractProvider(instance) + const model = handler.extractModel(instance) + + const tags = handler.getSpanStartTags(ctx, provider) || [] + + if (apiKey) tags[API_KEY] = apiKey + if (provider) tags[PROVIDER] = provider + if (model) tags[MODEL] = model + if (type) tags[TYPE] = type + + const span = this.startSpan('langchain.request', { + service: this.config.service, + resource, + kind: 'client', + meta: { + [MEASURED]: 1, + ...tags + } + }, false) + + const store = storage.getStore() || {} + ctx.currentStore = { ...store, span } + + return ctx.currentStore + } + + asyncEnd (ctx) { + const span = ctx.currentStore.span + + const { type } = ctx + + const handler = this.handlers[type] + const tags = handler.getSpanEndTags(ctx) || {} + + span.addTags(tags) + + span.finish() + } + + getHandler (type) { + return this.handlers[type] || this.handlers.default + } +} + +module.exports = LangChainPlugin diff --git a/packages/datadog-plugin-langchain/src/tokens.js b/packages/datadog-plugin-langchain/src/tokens.js new file mode 100644 index 00000000000..e29bb80735c --- /dev/null +++ b/packages/datadog-plugin-langchain/src/tokens.js @@ -0,0 +1,35 @@ +'use strict' + +function getTokensFromLlmOutput (result) { + const tokens = { + input: 0, + output: 0, + total: 0 + } + const { llmOutput } = result + if (!llmOutput) return tokens + + const tokenUsage = llmOutput.tokenUsage || llmOutput.usage_metadata || llmOutput.usage_metadata + if (!tokenUsage) return tokens + + for (const tokenNames of [['input', 'prompt'], ['output', 'completion'], ['total']]) { + let token = 0 + for (const tokenName of tokenNames) { + const underScore = `${tokenName}_tokens` + const camelCase = `${tokenName}Tokens` + + token = tokenUsage[underScore] || tokenUsage[camelCase] || token + } + + tokens[tokenNames[0]] = token + } + + // assign total_tokens again in case it was improperly set the first time, or was not on tokenUsage + tokens.total = tokens.total || tokens.input + tokens.output + + return tokens +} + +module.exports = { + getTokensFromLlmOutput +} diff --git a/packages/datadog-plugin-langchain/test/index.spec.js b/packages/datadog-plugin-langchain/test/index.spec.js new file mode 100644 index 00000000000..77f61da3688 --- /dev/null +++ b/packages/datadog-plugin-langchain/test/index.spec.js @@ -0,0 +1,878 @@ +'use strict' + +const { useEnv } = require('../../../integration-tests/helpers') +const agent = require('../../dd-trace/test/plugins/agent') + +const nock = require('nock') + +function stubCall ({ base = '', path = '', code = 200, response = {} }) { + const responses = Array.isArray(response) ? response : [response] + const times = responses.length + nock(base).post(path).times(times).reply(() => { + return [code, responses.shift()] + }) +} +const openAiBaseCompletionInfo = { base: 'https://api.openai.com', path: '/v1/completions' } +const openAiBaseChatInfo = { base: 'https://api.openai.com', path: '/v1/chat/completions' } +const openAiBaseEmbeddingInfo = { base: 'https://api.openai.com', path: '/v1/embeddings' } + +describe('Plugin', () => { + let langchainOpenai + let langchainAnthropic + + let langchainMessages + let langchainOutputParsers + let langchainPrompts + let langchainRunnables + + // so we can verify it gets tagged properly + useEnv({ + OPENAI_API_KEY: '', + ANTHROPIC_API_KEY: '' + }) + + describe('langchain', () => { + withVersions('langchain', ['@langchain/core'], version => { + beforeEach(() => { + return agent.load('langchain') + }) + + afterEach(() => { + // wiping in order to read new env vars for the config each time + return agent.close({ ritmReset: false, wipe: true }) + }) + + beforeEach(() => { + langchainOpenai = require(`../../../versions/@langchain/openai@${version}`).get() + langchainAnthropic = require(`../../../versions/@langchain/anthropic@${version}`).get() + + // need to specify specific import in `get(...)` + langchainMessages = require(`../../../versions/@langchain/core@${version}`).get('@langchain/core/messages') + langchainOutputParsers = require(`../../../versions/@langchain/core@${version}`) + .get('@langchain/core/output_parsers') + langchainPrompts = require(`../../../versions/@langchain/core@${version}`).get('@langchain/core/prompts') + langchainRunnables = require(`../../../versions/@langchain/core@${version}`).get('@langchain/core/runnables') + }) + + afterEach(() => { + nock.cleanAll() + }) + + describe('with global configurations', () => { + describe('with sampling rate', () => { + useEnv({ + DD_LANGCHAIN_SPAN_PROMPT_COMPLETION_SAMPLE_RATE: 0 + }) + + it('does not tag prompt or completion', async () => { + stubCall({ + ...openAiBaseCompletionInfo, + response: { + model: 'gpt-3.5-turbo-instruct', + choices: [{ + text: 'The answer is 4', + index: 0, + logprobs: null, + finish_reason: 'length' + }], + usage: { prompt_tokens: 8, completion_tokens: 12, otal_tokens: 20 } + } + }) + + const llm = new langchainOpenai.OpenAI({ model: 'gpt-3.5-turbo-instruct' }) + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span.meta).to.not.have.property('langchain.request.prompts.0.content') + expect(span.meta).to.not.have.property('langchain.response.completions.0.text') + }) + + const result = await llm.generate(['what is 2 + 2?']) + + expect(result.generations[0][0].text).to.equal('The answer is 4') + + await checkTraces + }) + }) + + describe('with span char limit', () => { + useEnv({ + DD_LANGCHAIN_SPAN_CHAR_LIMIT: 5 + }) + + it('truncates the prompt and completion', async () => { + stubCall({ + ...openAiBaseCompletionInfo, + response: { + model: 'gpt-3.5-turbo-instruct', + choices: [{ + text: 'The answer is 4', + index: 0, + logprobs: null, + finish_reason: 'length' + }], + usage: { prompt_tokens: 8, completion_tokens: 12, otal_tokens: 20 } + } + }) + + const llm = new langchainOpenai.OpenAI({ model: 'gpt-3.5-turbo-instruct' }) + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span.meta).to.have.property('langchain.request.prompts.0.content', 'what ...') + expect(span.meta).to.have.property('langchain.response.completions.0.text', 'The a...') + }) + + const result = await llm.generate(['what is 2 + 2?']) + + expect(result.generations[0][0].text).to.equal('The answer is 4') + + await checkTraces + }) + }) + }) + + describe('llm', () => { + it('instruments a langchain llm call for a single prompt', async () => { + stubCall({ + ...openAiBaseCompletionInfo, + response: { + model: 'gpt-3.5-turbo-instruct', + choices: [{ + text: 'The answer is 4', + index: 0, + logprobs: null, + finish_reason: 'length' + }], + usage: { prompt_tokens: 8, completion_tokens: 12, otal_tokens: 20 } + } + }) + + const llm = new langchainOpenai.OpenAI({ model: 'gpt-3.5-turbo-instruct' }) + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span).to.have.property('name', 'langchain.request') + expect(span).to.have.property('resource', 'langchain.llms.openai.OpenAI') + + expect(span.meta).to.have.property('langchain.request.api_key', '...key>') + expect(span.meta).to.have.property('langchain.request.provider', 'openai') + expect(span.meta).to.have.property('langchain.request.model', 'gpt-3.5-turbo-instruct') + expect(span.meta).to.have.property('langchain.request.type', 'llm') + expect(span.meta).to.have.property('langchain.request.prompts.0.content', 'what is 2 + 2?') + + expect(span.meta).to.have.property('langchain.response.completions.0.text', 'The answer is 4') + expect(span.meta).to.have.property('langchain.response.completions.0.finish_reason', 'length') + + expect(span.metrics).to.have.property('langchain.tokens.input_tokens', 8) + expect(span.metrics).to.have.property('langchain.tokens.output_tokens', 12) + expect(span.metrics).to.have.property('langchain.tokens.total_tokens', 20) + }) + + const result = await llm.generate(['what is 2 + 2?']) + + expect(result.generations[0][0].text).to.equal('The answer is 4') + + await checkTraces + }) + + it('instruments a langchain openai llm call for multiple prompts', async () => { + stubCall({ + ...openAiBaseCompletionInfo, + response: { + model: 'gpt-3.5-turbo-instruct', + choices: [{ + text: 'The answer is 4', + index: 0, + logprobs: null, + finish_reason: 'length' + }, { + text: 'The circumference of the earth is 24,901 miles', + index: 1, + logprobs: null, + finish_reason: 'length' + }], + usage: { prompt_tokens: 8, completion_tokens: 12, otal_tokens: 20 } + } + }) + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span.meta).to.have.property('langchain.request.prompts.0.content', 'what is 2 + 2?') + expect(span.meta).to.have.property( + 'langchain.request.prompts.1.content', 'what is the circumference of the earth?') + + expect(span.meta).to.have.property('langchain.response.completions.0.text', 'The answer is 4') + expect(span.meta).to.have.property( + 'langchain.response.completions.1.text', 'The circumference of the earth is 24,901 miles') + }) + + const llm = new langchainOpenai.OpenAI({ model: 'gpt-3.5-turbo-instruct' }) + const result = await llm.generate(['what is 2 + 2?', 'what is the circumference of the earth?']) + + expect(result.generations[0][0].text).to.equal('The answer is 4') + expect(result.generations[1][0].text).to.equal('The circumference of the earth is 24,901 miles') + + await checkTraces + }) + + it('instruments a langchain openai llm call for a single prompt and multiple responses', async () => { + // it should only use the first choice + stubCall({ + ...openAiBaseCompletionInfo, + response: { + model: 'gpt-3.5-turbo-instruct', + choices: [{ + text: 'The answer is 4', + index: 0, + logprobs: null, + finish_reason: 'length' + }, { + text: '2 + 2 = 4', + index: 1, + logprobs: null, + finish_reason: 'length' + }], + usage: { prompt_tokens: 8, completion_tokens: 12, otal_tokens: 20 } + } + }) + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span.metrics).to.have.property('langchain.request.openai.parameters.n', 2) + + expect(span.meta).to.have.property('langchain.request.prompts.0.content', 'what is 2 + 2?') + expect(span.meta).to.have.property('langchain.response.completions.0.text', 'The answer is 4') + + expect(span.meta).to.not.have.property('langchain.response.completions.1.text') + }) + + const llm = new langchainOpenai.OpenAI({ model: 'gpt-3.5-turbo-instruct', n: 2 }) + const result = await llm.generate(['what is 2 + 2?']) + + expect(result.generations[0][0].text).to.equal('The answer is 4') + expect(result.generations[0][1].text).to.equal('2 + 2 = 4') + + await checkTraces + }) + }) + + describe('chat model', () => { + it('instruments a langchain openai chat model call for a single string prompt', async () => { + stubCall({ + ...openAiBaseChatInfo, + response: { + model: 'gpt-4', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'Hello! How can I assist you today?' + }, + finish_reason: 'length', + index: 0 + }] + } + }) + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span).to.have.property('name', 'langchain.request') + expect(span).to.have.property('resource', 'langchain.chat_models.openai.ChatOpenAI') + + expect(span.meta).to.have.property('langchain.request.api_key', '...key>') + expect(span.meta).to.have.property('langchain.request.provider', 'openai') + expect(span.meta).to.have.property('langchain.request.model', 'gpt-4') + expect(span.meta).to.have.property('langchain.request.type', 'chat_model') + + expect(span.meta).to.have.property('langchain.request.messages.0.0.content', 'Hello!') + expect(span.meta).to.have.property('langchain.request.messages.0.0.message_type', 'HumanMessage') + + expect(span.meta).to.have.property( + 'langchain.response.completions.0.0.content', 'Hello! How can I assist you today?' + ) + expect(span.meta).to.have.property('langchain.response.completions.0.0.message_type', 'AIMessage') + + expect(span.metrics).to.have.property('langchain.tokens.input_tokens', 37) + expect(span.metrics).to.have.property('langchain.tokens.output_tokens', 10) + expect(span.metrics).to.have.property('langchain.tokens.total_tokens', 47) + }) + + const chatModel = new langchainOpenai.ChatOpenAI({ model: 'gpt-4' }) + const result = await chatModel.invoke('Hello!') + + expect(result.content).to.equal('Hello! How can I assist you today?') + + await checkTraces + }) + + it('instruments a langchain openai chat model call for a JSON message input', async () => { + stubCall({ + ...openAiBaseChatInfo, + response: { + model: 'gpt-4', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'Hi!' + }, + finish_reason: 'length', + index: 0 + }] + } + }) + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span.meta).to.have.property( + 'langchain.request.messages.0.0.content', 'You only respond with one word answers' + ) + expect(span.meta).to.have.property('langchain.request.messages.0.0.message_type', 'SystemMessage') + expect(span.meta).to.have.property('langchain.request.messages.0.1.content', 'Hello!') + expect(span.meta).to.have.property('langchain.request.messages.0.1.message_type', 'HumanMessage') + + expect(span.meta).to.have.property('langchain.response.completions.0.0.content', 'Hi!') + expect(span.meta).to.have.property('langchain.response.completions.0.0.message_type', 'AIMessage') + }) + + const chatModel = new langchainOpenai.ChatOpenAI({ model: 'gpt-4' }) + const messages = [ + { role: 'system', content: 'You only respond with one word answers' }, + { role: 'human', content: 'Hello!' } + ] + + const result = await chatModel.invoke(messages) + expect(result.content).to.equal('Hi!') + + await checkTraces + }) + + it('instruments a langchain openai chat model call for a BaseMessage-like input', async () => { + stubCall({ + ...openAiBaseChatInfo, + response: { + model: 'gpt-4', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'Hi!' + }, + finish_reason: 'length', + index: 0 + }] + } + }) + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span.meta).to.have.property( + 'langchain.request.messages.0.0.content', 'You only respond with one word answers' + ) + expect(span.meta).to.have.property('langchain.request.messages.0.0.message_type', 'SystemMessage') + expect(span.meta).to.have.property('langchain.request.messages.0.1.content', 'Hello!') + expect(span.meta).to.have.property('langchain.request.messages.0.1.message_type', 'HumanMessage') + + expect(span.meta).to.have.property( + 'langchain.response.completions.0.0.content', 'Hi!' + ) + expect(span.meta).to.have.property('langchain.response.completions.0.0.message_type', 'AIMessage') + }) + + const chatModel = new langchainOpenai.ChatOpenAI({ model: 'gpt-4' }) + const messages = [ + new langchainMessages.SystemMessage('You only respond with one word answers'), + new langchainMessages.HumanMessage('Hello!') + ] + const result = await chatModel.invoke(messages) + + expect(result.content).to.equal('Hi!') + + await checkTraces + }) + + it('instruments a langchain openai chat model call with tool calls', async () => { + stubCall({ + ...openAiBaseChatInfo, + response: { + model: 'gpt-4', + choices: [{ + message: { + role: 'assistant', + content: null, + tool_calls: [ + { + id: 'tool-1', + type: 'function', + function: { + name: 'extract_fictional_info', + arguments: '{"name":"SpongeBob","origin":"Bikini Bottom"}' + } + } + ] + }, + finish_reason: 'tool_calls', + index: 0 + }] + } + }) + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span.meta).to.have.property( + 'langchain.request.messages.0.0.content', 'My name is SpongeBob and I live in Bikini Bottom.' + ) + expect(span.meta).to.have.property('langchain.request.messages.0.0.message_type', 'HumanMessage') + expect(span.meta).to.not.have.property('langchain.response.completions.0.0.content') + expect(span.meta).to.have.property('langchain.response.completions.0.0.message_type', 'AIMessage') + expect(span.meta).to.have.property('langchain.response.completions.0.0.tool_calls.0.id', 'tool-1') + expect(span.meta).to.have.property( + 'langchain.response.completions.0.0.tool_calls.0.name', 'extract_fictional_info' + ) + expect(span.meta).to.have.property( + 'langchain.response.completions.0.0.tool_calls.0.args.name', 'SpongeBob' + ) + expect(span.meta).to.have.property( + 'langchain.response.completions.0.0.tool_calls.0.args.origin', 'Bikini Bottom' + ) + }) + + const tools = [ + { + name: 'extract_fictional_info', + description: 'Get the fictional information from the body of the input text', + parameters: { + type: 'object', + properties: { + name: { type: 'string', description: 'Name of the character' }, + origin: { type: 'string', description: 'Where they live' } + } + } + } + ] + + const model = new langchainOpenai.ChatOpenAI({ model: 'gpt-4' }) + const modelWithTools = model.bindTools(tools) + + const result = await modelWithTools.invoke('My name is SpongeBob and I live in Bikini Bottom.') + expect(result.tool_calls).to.have.length(1) + expect(result.tool_calls[0].name).to.equal('extract_fictional_info') + + await checkTraces + }) + + it('instruments a langchain anthropic chat model call', async () => { + stubCall({ + base: 'https://api.anthropic.com', + path: '/v1/messages', + response: { + id: 'msg_01NE2EJQcjscRyLbyercys6p', + type: 'message', + role: 'assistant', + model: 'claude-3-opus-20240229', + content: [ + { type: 'text', text: 'Hello!' } + ], + stop_reason: 'end_turn', + stop_sequence: null, + usage: { input_tokens: 11, output_tokens: 6 } + } + }) + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span).to.have.property('name', 'langchain.request') + expect(span).to.have.property('resource', 'langchain.chat_models.anthropic.ChatAnthropic') + + expect(span.meta).to.have.property('langchain.request.api_key', '...key>') + expect(span.meta).to.have.property('langchain.request.provider', 'anthropic') + expect(span.meta).to.have.property('langchain.request.model') + expect(span.meta).to.have.property('langchain.request.type', 'chat_model') + + expect(span.meta).to.have.property('langchain.request.messages.0.0.content', 'Hello!') + expect(span.meta).to.have.property('langchain.request.messages.0.0.message_type', 'HumanMessage') + + expect(span.meta).to.have.property('langchain.response.completions.0.0.content', 'Hello!') + expect(span.meta).to.have.property('langchain.response.completions.0.0.message_type', 'AIMessage') + }) + + const chatModel = new langchainAnthropic.ChatAnthropic({ model: 'claude-3-opus-20240229' }) + + const result = await chatModel.invoke('Hello!') + expect(result.content).to.equal('Hello!') + + await checkTraces + }) + }) + + describe('chain', () => { + it('instruments a langchain chain with a single openai chat model call', async () => { + stubCall({ + ...openAiBaseChatInfo, + response: { + model: 'gpt-4', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'Hi!' + }, + finish_reason: 'length', + index: 0 + }] + } + }) + + const checkTraces = agent + .use(traces => { + const spans = traces[0] + expect(spans).to.have.length(2) + + const chainSpan = spans[0] + // we already check the chat model span in previous tests + expect(spans[1]).to.have.property('resource', 'langchain.chat_models.openai.ChatOpenAI') + + expect(chainSpan).to.have.property('name', 'langchain.request') + expect(chainSpan).to.have.property('resource', 'langchain_core.runnables.RunnableSequence') + + expect(chainSpan.meta).to.have.property('langchain.request.type', 'chain') + + expect(chainSpan.meta).to.have.property( + 'langchain.request.inputs.0.content', 'You only respond with one word answers' + ) + expect(chainSpan.meta).to.have.property('langchain.request.inputs.1.content', 'Hello!') + + expect(chainSpan.meta).to.have.property('langchain.response.outputs.0', 'Hi!') + }) + + const model = new langchainOpenai.ChatOpenAI({ model: 'gpt-4' }) + const parser = new langchainOutputParsers.StringOutputParser() + + const chain = model.pipe(parser) + const messages = [ + new langchainMessages.SystemMessage('You only respond with one word answers'), + new langchainMessages.HumanMessage('Hello!') + ] + const result = await chain.invoke(messages) + + expect(result).to.equal('Hi!') + + await checkTraces + }) + + it('instruments a complex langchain chain', async () => { + stubCall({ + ...openAiBaseChatInfo, + response: { + model: 'gpt-4', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'Why did the chicken cross the road? To get to the other side!' + } + }] + } + }) + + const prompt = langchainPrompts.ChatPromptTemplate.fromTemplate( + 'Tell me a short joke about {topic} in the style of {style}' + ) + + const model = new langchainOpenai.ChatOpenAI({ model: 'gpt-4' }) + + const parser = new langchainOutputParsers.StringOutputParser() + + const chain = langchainRunnables.RunnableSequence.from([ + { + topic: new langchainRunnables.RunnablePassthrough(), + style: new langchainRunnables.RunnablePassthrough() + }, + prompt, + model, + parser + ]) + + const checkTraces = agent + .use(traces => { + const spans = traces[0] + expect(spans).to.have.length(2) + + const chainSpan = spans[0] + // we already check the chat model span in previous tests + expect(spans[1]).to.have.property('resource', 'langchain.chat_models.openai.ChatOpenAI') + + expect(chainSpan.meta).to.have.property('langchain.request.type', 'chain') + expect(chainSpan.meta).to.have.property('langchain.request.inputs.0.topic', 'chickens') + expect(chainSpan.meta).to.have.property('langchain.request.inputs.0.style', 'dad joke') + expect(chainSpan.meta).to.have.property( + 'langchain.response.outputs.0', 'Why did the chicken cross the road? To get to the other side!' + ) + }) + + const result = await chain.invoke({ topic: 'chickens', style: 'dad joke' }) + + expect(result).to.equal('Why did the chicken cross the road? To get to the other side!') + + await checkTraces + }) + + it('instruments a batched call', async () => { + stubCall({ + ...openAiBaseChatInfo, + response: [ + { + model: 'gpt-4', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'Why did the chicken cross the road? To get to the other side!' + } + }] + }, + { + model: 'gpt-4', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'Why was the dog confused? It was barking up the wrong tree!' + } + }] + } + ] + }) + + const prompt = langchainPrompts.ChatPromptTemplate.fromTemplate( + 'Tell me a joke about {topic}' + ) + const parser = new langchainOutputParsers.StringOutputParser() + const model = new langchainOpenai.ChatOpenAI({ model: 'gpt-4' }) + + const chain = langchainRunnables.RunnableSequence.from([ + { + topic: new langchainRunnables.RunnablePassthrough() + }, + prompt, + model, + parser + ]) + + const checkTraces = agent + .use(traces => { + const spans = traces[0] + expect(spans).to.have.length(3) // 1 chain + 2 chat model + + const chainSpan = spans[0] + + expect(chainSpan.meta).to.have.property('langchain.request.type', 'chain') + expect(chainSpan.meta).to.have.property('langchain.request.inputs.0', 'chickens') + expect(chainSpan.meta).to.have.property('langchain.request.inputs.1', 'dogs') + expect(chainSpan.meta).to.have.property( + 'langchain.response.outputs.0', 'Why did the chicken cross the road? To get to the other side!' + ) + expect(chainSpan.meta).to.have.property( + 'langchain.response.outputs.1', 'Why was the dog confused? It was barking up the wrong tree!' + ) + }) + + const result = await chain.batch(['chickens', 'dogs']) + + expect(result).to.have.length(2) + expect(result[0]).to.equal('Why did the chicken cross the road? To get to the other side!') + expect(result[1]).to.equal('Why was the dog confused? It was barking up the wrong tree!') + + await checkTraces + }) + + it('instruments a chain with a JSON output parser and tags it correctly', async function () { + if (!langchainOutputParsers.JsonOutputParser) this.skip() + + stubCall({ + ...openAiBaseChatInfo, + response: { + choices: [{ + message: { + role: 'assistant', + content: '{\n "name": "John",\n "age": 30\n}', + refusal: null + } + }] + } + }) + + const checkTraces = agent + .use(traces => { + const spans = traces[0] + expect(spans).to.have.length(2) // 1 chain + 1 chat model + + const chainSpan = spans[0] + + expect(chainSpan.meta).to.have.property('langchain.request.type', 'chain') + expect(chainSpan.meta).to.have.property( + 'langchain.request.inputs.0', 'Generate a JSON object with name and age.' + ) + + expect(chainSpan.meta).to.have.property('langchain.response.outputs.0', '{"name":"John","age":30}') + }) + + const parser = new langchainOutputParsers.JsonOutputParser() + const model = new langchainOpenai.ChatOpenAI({ model: 'gpt-3.5-turbo' }) + + const chain = model.pipe(parser) + + const response = await chain.invoke('Generate a JSON object with name and age.') + expect(response).to.deep.equal({ + name: 'John', + age: 30 + }) + + await checkTraces + }) + }) + + describe('embeddings', () => { + describe('@langchain/openai', () => { + it('instruments a langchain openai embedQuery call', async () => { + stubCall({ + ...openAiBaseEmbeddingInfo, + response: { + object: 'list', + data: [{ + object: 'embedding', + index: 0, + embedding: [-0.0034387498, -0.026400521] + }] + } + }) + const embeddings = new langchainOpenai.OpenAIEmbeddings() + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span).to.have.property('name', 'langchain.request') + expect(span).to.have.property('resource', 'langchain.embeddings.openai.OpenAIEmbeddings') + + expect(span.meta).to.have.property('langchain.request.api_key', '...key>') + expect(span.meta).to.have.property('langchain.request.provider', 'openai') + expect(span.meta).to.have.property('langchain.request.model', 'text-embedding-ada-002') + expect(span.meta).to.have.property('langchain.request.type', 'embedding') + + expect(span.meta).to.have.property('langchain.request.inputs.0.text', 'Hello, world!') + expect(span.metrics).to.have.property('langchain.request.input_counts', 1) + expect(span.metrics).to.have.property('langchain.response.outputs.embedding_length', 2) + }) + + const query = 'Hello, world!' + const result = await embeddings.embedQuery(query) + + expect(result).to.have.length(2) + expect(result).to.deep.equal([-0.0034387498, -0.026400521]) + + await checkTraces + }) + + it('instruments a langchain openai embedDocuments call', async () => { + stubCall({ + ...openAiBaseEmbeddingInfo, + response: { + object: 'list', + data: [{ + object: 'embedding', + index: 0, + embedding: [-0.0034387498, -0.026400521] + }, { + object: 'embedding', + index: 1, + embedding: [-0.026400521, -0.0034387498] + }] + } + }) + + const checkTraces = agent + .use(traces => { + expect(traces[0].length).to.equal(1) + const span = traces[0][0] + + expect(span.meta).to.have.property('langchain.request.inputs.0.text', 'Hello, world!') + expect(span.meta).to.have.property('langchain.request.inputs.1.text', 'Goodbye, world!') + expect(span.metrics).to.have.property('langchain.request.input_counts', 2) + + expect(span.metrics).to.have.property('langchain.response.outputs.embedding_length', 2) + }) + + const embeddings = new langchainOpenai.OpenAIEmbeddings() + + const documents = ['Hello, world!', 'Goodbye, world!'] + const result = await embeddings.embedDocuments(documents) + + expect(result).to.have.length(2) + expect(result[0]).to.deep.equal([-0.0034387498, -0.026400521]) + expect(result[1]).to.deep.equal([-0.026400521, -0.0034387498]) + + await checkTraces + }) + }) + }) + }) + }) +}) diff --git a/packages/datadog-plugin-langchain/test/integration-test/client.spec.js b/packages/datadog-plugin-langchain/test/integration-test/client.spec.js new file mode 100644 index 00000000000..bc505687115 --- /dev/null +++ b/packages/datadog-plugin-langchain/test/integration-test/client.spec.js @@ -0,0 +1,55 @@ +'use strict' + +const { + FakeAgent, + createSandbox, + checkSpansForServiceName, + spawnPluginIntegrationTestProc +} = require('../../../../integration-tests/helpers') +const { assert } = require('chai') + +// there is currently an issue with langchain + esm loader hooks from IITM +// https://github.com/nodejs/import-in-the-middle/issues/163 +describe.skip('esm', () => { + let agent + let proc + let sandbox + + withVersions('langchain', ['@langchain/core'], '>=0.1', version => { + before(async function () { + this.timeout(20000) + sandbox = await createSandbox([ + `@langchain/core@${version}`, + `@langchain/openai@${version}`, + 'nock' + ], false, [ + './packages/datadog-plugin-langchain/test/integration-test/*' + ]) + }) + + after(async () => { + await sandbox.remove() + }) + + beforeEach(async () => { + agent = await new FakeAgent().start() + }) + + afterEach(async () => { + proc?.kill() + await agent.stop() + }) + + it('is instrumented', async () => { + const res = agent.assertMessageReceived(({ headers, payload }) => { + assert.propertyVal(headers, 'host', `127.0.0.1:${agent.port}`) + assert.isArray(payload) + assert.strictEqual(checkSpansForServiceName(payload, 'langchain.request'), true) + }) + + proc = await spawnPluginIntegrationTestProc(sandbox.folder, 'server.mjs', agent.port) + + await res + }).timeout(20000) + }) +}) diff --git a/packages/datadog-plugin-langchain/test/integration-test/server.mjs b/packages/datadog-plugin-langchain/test/integration-test/server.mjs new file mode 100644 index 00000000000..b929824b7dd --- /dev/null +++ b/packages/datadog-plugin-langchain/test/integration-test/server.mjs @@ -0,0 +1,18 @@ +import 'dd-trace/init.js' +import { OpenAI } from '@langchain/openai' +import { StringOutputParser } from '@langchain/core/output_parsers' +import nock from 'nock' + +nock('https://api.openai.com:443') + .post('/v1/completions') + .reply(200, {}) + +const llm = new OpenAI({ + apiKey: '' +}) + +const parser = new StringOutputParser() + +const chain = llm.pipe(parser) + +await chain.invoke('a test') diff --git a/packages/dd-trace/src/config.js b/packages/dd-trace/src/config.js index 05de1cdf600..73cac449546 100644 --- a/packages/dd-trace/src/config.js +++ b/packages/dd-trace/src/config.js @@ -505,6 +505,8 @@ class Config { this._setValue(defaults, 'isGitUploadEnabled', false) this._setValue(defaults, 'isIntelligentTestRunnerEnabled', false) this._setValue(defaults, 'isManualApiEnabled', false) + this._setValue(defaults, 'langchain.spanCharLimit', 128) + this._setValue(defaults, 'langchain.spanPromptCompletionSampleRate', 1.0) this._setValue(defaults, 'llmobs.agentlessEnabled', false) this._setValue(defaults, 'llmobs.enabled', false) this._setValue(defaults, 'llmobs.mlApp', undefined) @@ -615,6 +617,8 @@ class Config { DD_INSTRUMENTATION_TELEMETRY_ENABLED, DD_INSTRUMENTATION_CONFIG_ID, DD_LOGS_INJECTION, + DD_LANGCHAIN_SPAN_CHAR_LIMIT, + DD_LANGCHAIN_SPAN_PROMPT_COMPLETION_SAMPLE_RATE, DD_LLMOBS_AGENTLESS_ENABLED, DD_LLMOBS_ENABLED, DD_LLMOBS_ML_APP, @@ -771,6 +775,10 @@ class Config { this._setArray(env, 'injectionEnabled', DD_INJECTION_ENABLED) this._setBoolean(env, 'isAzureFunction', getIsAzureFunction()) this._setBoolean(env, 'isGCPFunction', getIsGCPFunction()) + this._setValue(env, 'langchain.spanCharLimit', maybeInt(DD_LANGCHAIN_SPAN_CHAR_LIMIT)) + this._setValue( + env, 'langchain.spanPromptCompletionSampleRate', maybeFloat(DD_LANGCHAIN_SPAN_PROMPT_COMPLETION_SAMPLE_RATE) + ) this._setBoolean(env, 'legacyBaggageEnabled', DD_TRACE_LEGACY_BAGGAGE_ENABLED) this._setBoolean(env, 'llmobs.agentlessEnabled', DD_LLMOBS_AGENTLESS_ENABLED) this._setBoolean(env, 'llmobs.enabled', DD_LLMOBS_ENABLED) diff --git a/packages/dd-trace/src/plugins/index.js b/packages/dd-trace/src/plugins/index.js index 80c32401536..3e77226a119 100644 --- a/packages/dd-trace/src/plugins/index.js +++ b/packages/dd-trace/src/plugins/index.js @@ -15,6 +15,8 @@ module.exports = { get '@jest/test-sequencer' () { return require('../../../datadog-plugin-jest/src') }, get '@jest/transform' () { return require('../../../datadog-plugin-jest/src') }, get '@koa/router' () { return require('../../../datadog-plugin-koa/src') }, + get '@langchain/core' () { return require('../../../datadog-plugin-langchain/src') }, + get '@langchain/openai' () { return require('../../../datadog-plugin-langchain/src') }, get '@node-redis/client' () { return require('../../../datadog-plugin-redis/src') }, get '@opensearch-project/opensearch' () { return require('../../../datadog-plugin-opensearch/src') }, get '@redis/client' () { return require('../../../datadog-plugin-redis/src') }, @@ -52,6 +54,7 @@ module.exports = { get koa () { return require('../../../datadog-plugin-koa/src') }, get 'koa-router' () { return require('../../../datadog-plugin-koa/src') }, get kafkajs () { return require('../../../datadog-plugin-kafkajs/src') }, + get langchain () { return require('../../../datadog-plugin-langchain/src') }, get mariadb () { return require('../../../datadog-plugin-mariadb/src') }, get memcached () { return require('../../../datadog-plugin-memcached/src') }, get 'microgateway-core' () { return require('../../../datadog-plugin-microgateway-core/src') }, diff --git a/packages/dd-trace/test/config.spec.js b/packages/dd-trace/test/config.spec.js index f840dcd4a13..1720c4a5c91 100644 --- a/packages/dd-trace/test/config.spec.js +++ b/packages/dd-trace/test/config.spec.js @@ -334,6 +334,8 @@ describe('Config', () => { { name: 'isGitUploadEnabled', value: false, origin: 'default' }, { name: 'isIntelligentTestRunnerEnabled', value: false, origin: 'default' }, { name: 'isManualApiEnabled', value: false, origin: 'default' }, + { name: 'langchain.spanCharLimit', value: 128, origin: 'default' }, + { name: 'langchain.spanPromptCompletionSampleRate', value: 1.0, origin: 'default' }, { name: 'llmobs.agentlessEnabled', value: false, origin: 'default' }, { name: 'llmobs.mlApp', value: undefined, origin: 'default' }, { name: 'ciVisibilityTestSessionName', value: '', origin: 'default' }, @@ -509,6 +511,8 @@ describe('Config', () => { process.env.DD_INSTRUMENTATION_INSTALL_TYPE = 'k8s_single_step' process.env.DD_INSTRUMENTATION_INSTALL_TIME = '1703188212' process.env.DD_INSTRUMENTATION_CONFIG_ID = 'abcdef123' + process.env.DD_LANGCHAIN_SPAN_CHAR_LIMIT = 50 + process.env.DD_LANGCHAIN_SPAN_PROMPT_COMPLETION_SAMPLE_RATE = 0.5 process.env.DD_LLMOBS_AGENTLESS_ENABLED = 'true' process.env.DD_LLMOBS_ML_APP = 'myMlApp' process.env.DD_TRACE_ENABLED = 'true' @@ -684,7 +688,9 @@ describe('Config', () => { { name: 'tracing', value: false, origin: 'env_var' }, { name: 'version', value: '1.0.0', origin: 'env_var' }, { name: 'llmobs.mlApp', value: 'myMlApp', origin: 'env_var' }, - { name: 'llmobs.agentlessEnabled', value: true, origin: 'env_var' } + { name: 'llmobs.agentlessEnabled', value: true, origin: 'env_var' }, + { name: 'langchain.spanCharLimit', value: 50, origin: 'env_var' }, + { name: 'langchain.spanPromptCompletionSampleRate', value: 0.5, origin: 'env_var' } ]) }) diff --git a/packages/dd-trace/test/plugins/externals.json b/packages/dd-trace/test/plugins/externals.json index 5b00aa6061c..600df395d84 100644 --- a/packages/dd-trace/test/plugins/externals.json +++ b/packages/dd-trace/test/plugins/externals.json @@ -271,6 +271,12 @@ "versions": ["6.1.0"] } ], + "langchain": [ + { + "name": "@langchain/anthropic", + "versions": [">=0.1"] + } + ], "ldapjs": [ { "name": "ldapjs",