From dc654f20a044f0d412552d0cce9033d51f895035 Mon Sep 17 00:00:00 2001 From: Peng Lyu Date: Wed, 20 Mar 2024 12:48:07 -0700 Subject: [PATCH] Support summarization for jupyter variables (#15404) * Support summarization for jupyter variables * Handle execptions --- .../getVariableInfo/vscodeGetVariableInfo.py | 14 ++++ .../variables/JupyterVariablesProvider.ts | 70 ++++++++++++++++++- src/kernels/variables/jupyterVariables.ts | 8 +++ src/kernels/variables/kernelVariables.ts | 19 +++++ .../variables/pythonVariableRequester.ts | 25 +++++++ src/kernels/variables/types.ts | 16 +++++ src/kernels/variables/variableResultCache.ts | 23 +++--- src/notebooks/debugger/debuggerVariables.ts | 4 ++ src/platform/common/types.ts | 1 + .../interpreter/variableScriptGenerator.ts | 8 +++ src/standalone/chat/extesnion.ts | 6 +- 11 files changed, 173 insertions(+), 21 deletions(-) diff --git a/pythonFiles/vscode_datascience_helpers/getVariableInfo/vscodeGetVariableInfo.py b/pythonFiles/vscode_datascience_helpers/getVariableInfo/vscodeGetVariableInfo.py index 80295b48141..acf9e7b8124 100644 --- a/pythonFiles/vscode_datascience_helpers/getVariableInfo/vscodeGetVariableInfo.py +++ b/pythonFiles/vscode_datascience_helpers/getVariableInfo/vscodeGetVariableInfo.py @@ -277,6 +277,18 @@ def _VSCODE_getVariableTypes(varnames): else: return _VSCODE_builtins.print(_VSCODE_json.dumps(result)) + def _VSCODE_getVariableSummary(variable): + if variable is None: + return None + # check if the variable is a dataframe + if ( + _VSCODE_builtins.type(variable).__name__ == "DataFrame" + and _VSCODE_importlib_util.find_spec("pandas") is not None + ): + return _VSCODE_builtins.print(variable.info()) + + return None + try: if what_to_get == "properties": return _VSCODE_getVariableProperties(*args) @@ -286,6 +298,8 @@ def _VSCODE_getVariableTypes(varnames): return _VSCODE_getVariableDescriptions(*args) elif what_to_get == "AllChildrenDescriptions": return _VSCODE_getAllChildrenDescriptions(*args) + elif what_to_get == "summary": + return _VSCODE_getVariableSummary(*args) else: return _VSCODE_getVariableTypes(*args) finally: diff --git a/src/kernels/variables/JupyterVariablesProvider.ts b/src/kernels/variables/JupyterVariablesProvider.ts index f02f55b0e1a..206652e9bb2 100644 --- a/src/kernels/variables/JupyterVariablesProvider.ts +++ b/src/kernels/variables/JupyterVariablesProvider.ts @@ -10,13 +10,14 @@ import { VariablesResult, EventEmitter } from 'vscode'; -import { IJupyterVariables, IVariableDescription } from './types'; +import { IJupyterVariables, IRichVariableResult, IVariableDescription } from './types'; import { IKernel, IKernelProvider } from '../types'; -import { VariableResultCache } from './variableResultCache'; +import { VariableResultCache, VariableSummaryCache } from './variableResultCache'; import { IDisposable } from '../../platform/common/types'; export class JupyterVariablesProvider implements NotebookVariableProvider { private variableResultCache = new VariableResultCache(); + private variableSummaryCache = new VariableSummaryCache(); private runningKernels = new Set(); _onDidChangeVariables = new EventEmitter(); @@ -45,6 +46,15 @@ export class JupyterVariablesProvider implements NotebookVariableProvider { } } + private _getVariableResultCacheKey(notebookUri: string, parent: Variable | undefined, start: number) { + let parentKey = ''; + const parentDescription = parent as IVariableDescription; + if (parentDescription) { + parentKey = `${parentDescription.name}.${parentDescription.propertyChain.join('.')}[[${start}`; + } + return `${notebookUri}:${parentKey}`; + } + async *provideVariables( notebook: NotebookDocument, parent: Variable | undefined, @@ -62,7 +72,7 @@ export class JupyterVariablesProvider implements NotebookVariableProvider { const executionCount = this.kernelProvider.getKernelExecution(kernel).executionCount; - const cacheKey = this.variableResultCache.getCacheKey(notebook.uri.toString(), parent, start); + const cacheKey = this._getVariableResultCacheKey(notebook.uri.toString(), parent, start); let results = this.variableResultCache.getResults(executionCount, cacheKey); if (parent) { @@ -110,6 +120,60 @@ export class JupyterVariablesProvider implements NotebookVariableProvider { } } + private _getVariableSummaryCacheKey(notebookUri: string, variable: Variable) { + return `${notebookUri}:${variable.name}`; + } + + async *provideVariablesWithSummarization( + notebook: NotebookDocument, + parent: Variable | undefined, + kind: NotebookVariablesRequestKind, + start: number, + token: CancellationToken + ): AsyncIterable { + const kernel = this.kernelProvider.get(notebook); + const results = this.provideVariables(notebook, parent, kind, start, token); + for await (const result of results) { + if (kernel && kernel.status !== 'dead' && kernel.status !== 'terminating') { + const cacheKey = this._getVariableSummaryCacheKey(notebook.uri.toString(), result.variable); + const executionCount = this.kernelProvider.getKernelExecution(kernel).executionCount; + let summary = this.variableSummaryCache.getResults(executionCount, cacheKey); + + if (summary == undefined) { + summary = await this.variables.getVariableValueSummary( + { + name: result.variable.name, + value: result.variable.value, + supportsDataExplorer: false, + type: result.variable.type ?? '', + size: 0, + count: 0, + shape: '', + truncated: true + }, + kernel, + token + ); + + this.variableSummaryCache.setResults(executionCount, cacheKey, summary ?? null); + } + + yield { + hasNamedChildren: result.hasNamedChildren, + indexedChildrenCount: result.indexedChildrenCount, + variable: { + name: result.variable.name, + value: result.variable.value, + expression: result.variable.expression, + type: result.variable.type, + language: result.variable.language, + summary: summary + } + }; + } + } + } + private createVariableResult(result: IVariableDescription, kernel: IKernel): VariablesResult { const indexedChildrenCount = result.count ?? 0; const hasNamedChildren = !!result.hasNamedChildren; diff --git a/src/kernels/variables/jupyterVariables.ts b/src/kernels/variables/jupyterVariables.ts index 328c0bb475b..08644f4a6f7 100644 --- a/src/kernels/variables/jupyterVariables.ts +++ b/src/kernels/variables/jupyterVariables.ts @@ -75,6 +75,14 @@ export class JupyterVariables implements IJupyterVariables { return this.variableHandler.getDataFrameInfo(targetVariable, kernel, sliceExpression, isRefresh); } + public async getVariableValueSummary( + targetVariable: IJupyterVariable, + kernel?: IKernel, + cancelToken?: CancellationToken + ) { + return this.variableHandler.getVariableValueSummary(targetVariable, kernel, cancelToken); + } + public async getDataFrameRows( targetVariable: IJupyterVariable, start: number, diff --git a/src/kernels/variables/kernelVariables.ts b/src/kernels/variables/kernelVariables.ts index 480416796f9..4a432fd9e97 100644 --- a/src/kernels/variables/kernelVariables.ts +++ b/src/kernels/variables/kernelVariables.ts @@ -129,6 +129,25 @@ export class KernelVariables implements IJupyterVariables { } } + public async getVariableValueSummary( + targetVariable: IJupyterVariable, + kernel?: IKernel, + cancelToken?: CancellationToken + ): Promise { + if (!kernel) { + return; + } + + const languageId = getKernelConnectionLanguage(kernel?.kernelConnectionMetadata) || PYTHON_LANGUAGE; + const variableRequester = this.variableRequesters.get(languageId); + + if (variableRequester) { + return variableRequester.getVariableValueSummary(targetVariable, kernel, cancelToken); + } + + return; + } + public async getDataFrameInfo( targetVariable: IJupyterVariable, kernel?: IKernel, diff --git a/src/kernels/variables/pythonVariableRequester.ts b/src/kernels/variables/pythonVariableRequester.ts index f153ef9e56e..0f3784954f5 100644 --- a/src/kernels/variables/pythonVariableRequester.ts +++ b/src/kernels/variables/pythonVariableRequester.ts @@ -148,6 +148,31 @@ export class PythonVariablesRequester implements IKernelVariableRequester { return result; } + public async getVariableValueSummary( + targetVariable: IJupyterVariable, + kernel: IKernel, + _cancelToken?: CancellationToken + ) { + const { code, cleanupCode, initializeCode } = + await this.varScriptGenerator.generateCodeToGetVariableValueSummary({ variableName: targetVariable.name }); + const results = await safeExecuteSilently( + kernel, + { code, cleanupCode, initializeCode }, + { + traceErrors: true, + traceErrorsMessage: 'Failure in execute_request for getDataFrameInfo', + telemetryName: Telemetry.PythonVariableFetchingCodeFailure + } + ); + + try { + const text = this.extractJupyterResultText(results); + return text; + } catch (_ex) { + return undefined; + } + } + public async getAllVariableDiscriptions( kernel: IKernel, parent: IVariableDescription | undefined, diff --git a/src/kernels/variables/types.ts b/src/kernels/variables/types.ts index cd1c98d7e99..cb2c958995c 100644 --- a/src/kernels/variables/types.ts +++ b/src/kernels/variables/types.ts @@ -41,6 +41,11 @@ export interface IJupyterVariables { kernel?: IKernel, cancelToken?: CancellationToken ): Promise; + getVariableValueSummary( + variable: IJupyterVariable, + kernel?: IKernel, + cancelToken?: CancellationToken + ): Promise; getDataFrameInfo( targetVariable: IJupyterVariable, kernel?: IKernel, @@ -99,6 +104,12 @@ export interface IVariableDescription extends Variable { getChildren?: (start: number, token: CancellationToken) => Promise; } +export interface IRichVariableResult { + variable: Variable & { summary?: string }; + hasNamedChildren: boolean; + indexedChildrenCount: number; +} + export const IKernelVariableRequester = Symbol('IKernelVariableRequester'); export interface IKernelVariableRequester { @@ -125,5 +136,10 @@ export interface IKernelVariableRequester { cancelToken: CancellationToken | undefined, matchingVariable: IJupyterVariable | undefined ): Promise<{ [attributeName: string]: string }>; + getVariableValueSummary( + targetVariable: IJupyterVariable, + kernel: IKernel, + token?: CancellationToken + ): Promise; getDataFrameInfo(targetVariable: IJupyterVariable, kernel: IKernel, expression: string): Promise; } diff --git a/src/kernels/variables/variableResultCache.ts b/src/kernels/variables/variableResultCache.ts index ed43652cac6..d696a48396a 100644 --- a/src/kernels/variables/variableResultCache.ts +++ b/src/kernels/variables/variableResultCache.ts @@ -1,23 +1,13 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -import { Variable, VariablesResult } from 'vscode'; -import { IVariableDescription } from './types'; +import { VariablesResult } from 'vscode'; -export class VariableResultCache { - private cache = new Map(); +export class VariableResultCacheBase { + private cache = new Map(); private executionCount = 0; - getCacheKey(notebookUri: string, parent: Variable | undefined, start: number): string { - let parentKey = ''; - const parentDescription = parent as IVariableDescription; - if (parentDescription) { - parentKey = `${parentDescription.name}.${parentDescription.propertyChain.join('.')}[[${start}`; - } - return `${notebookUri}:${parentKey}`; - } - - getResults(executionCount: number, cacheKey: string): VariablesResult[] | undefined { + getResults(executionCount: number, cacheKey: string): T | undefined { if (this.executionCount !== executionCount) { this.cache.clear(); this.executionCount = executionCount; @@ -26,7 +16,7 @@ export class VariableResultCache { return this.cache.get(cacheKey); } - setResults(executionCount: number, cacheKey: string, results: VariablesResult[]) { + setResults(executionCount: number, cacheKey: string, results: T) { if (this.executionCount < executionCount) { this.cache.clear(); this.executionCount = executionCount; @@ -38,3 +28,6 @@ export class VariableResultCache { this.cache.set(cacheKey, results); } } + +export const VariableResultCache = VariableResultCacheBase; +export const VariableSummaryCache = VariableResultCacheBase; diff --git a/src/notebooks/debugger/debuggerVariables.ts b/src/notebooks/debugger/debuggerVariables.ts index e94b7136823..eece4053c07 100644 --- a/src/notebooks/debugger/debuggerVariables.ts +++ b/src/notebooks/debugger/debuggerVariables.ts @@ -138,6 +138,10 @@ export class DebuggerVariables } } + public async getVariableValueSummary(_targetVariable: IJupyterVariable) { + return undefined; + } + public async getDataFrameInfo( targetVariable: IJupyterVariable, kernel?: IKernel, diff --git a/src/platform/common/types.ts b/src/platform/common/types.ts index f4f3ed66374..2eb4769cf34 100644 --- a/src/platform/common/types.ts +++ b/src/platform/common/types.ts @@ -309,6 +309,7 @@ export interface IVariableScriptGenerator { parent: { root: string; propertyChain: (string | number)[] } | undefined; startIndex: number; }): Promise; + generateCodeToGetVariableValueSummary(options: { variableName: string }): Promise; } export const IDataFrameScriptGenerator = Symbol('IDataFrameScriptGenerator'); export interface IDataFrameScriptGenerator { diff --git a/src/platform/interpreter/variableScriptGenerator.ts b/src/platform/interpreter/variableScriptGenerator.ts index f6ded5ce2bd..a07a137df27 100644 --- a/src/platform/interpreter/variableScriptGenerator.ts +++ b/src/platform/interpreter/variableScriptGenerator.ts @@ -120,6 +120,14 @@ export class VariableScriptGenerator implements IVariableScriptGenerator { }; } } + async generateCodeToGetVariableValueSummary(options: { variableName: string }) { + const initializeCode = await this.getContentsOfScript(); + const isDebugging = 'False'; + const code = `${VariableFunc}("summary", ${isDebugging}, ${options.variableName})`; + return { + code: `${initializeCode}\n\n${code}\n\n${cleanupCode}` + }; + } /** * Script content is static, hence read the contents once. */ diff --git a/src/standalone/chat/extesnion.ts b/src/standalone/chat/extesnion.ts index 760e0d6ef0c..b33222933fd 100644 --- a/src/standalone/chat/extesnion.ts +++ b/src/standalone/chat/extesnion.ts @@ -7,6 +7,7 @@ import { IKernel, IKernelProvider } from '../../kernels/types'; import { execCodeInBackgroundThread } from '../api/kernels/backgroundExecution'; import { ServiceContainer } from '../../platform/ioc/container'; import { IControllerRegistration } from '../../notebooks/controllers/types'; +import { JupyterVariablesProvider } from '../../kernels/variables/JupyterVariablesProvider'; export async function activate(context: vscode.ExtensionContext): Promise { context.subscriptions.push( @@ -54,14 +55,13 @@ export async function activate(context: vscode.ExtensionContext): Promise return []; } - const variablesProvider = controller.controller.variableProvider; - + const variablesProvider = controller.controller.variableProvider as JupyterVariablesProvider; if (!variablesProvider) { return []; } const token = new vscode.CancellationTokenSource().token; - const variables = variablesProvider.provideVariables( + const variables = variablesProvider.provideVariablesWithSummarization( document, undefined, vscode.NotebookVariablesRequestKind.Named,