Skip to content

Commit b74dc34

Browse files
melissa-barcawesmsharon-wang
authored
Assistant: Initial pass at implementing a data summary tool for Python (#8208)
First pass at #7114 Provides Assistant with a `getDataSummary` tool, currently only implemented for Python, that provides a JSON structured summary of a data object by using the Positron API to communicate with the Variables Comm. I updated the variable's python backend to reuse existing functionality from the data explorer. I used the `inspectVariables` tool as a guide for retrieving info from the variables comm. --------- Signed-off-by: Wes McKinney <wesm@apache.org> Co-authored-by: Wes McKinney <wes@posit.co> Co-authored-by: sharon <sharon-wang@users.noreply.github.com>
1 parent 5b5523a commit b74dc34

File tree

19 files changed

+592
-74
lines changed

19 files changed

+592
-74
lines changed

extensions/positron-assistant/package.json

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,39 @@
360360
"positron-assistant"
361361
]
362362
},
363+
{
364+
"name": "getTableSummary",
365+
"displayName": "Get Table Summary",
366+
"modelDescription": "Get structured information about table variables in the current session.",
367+
"inputSchema": {
368+
"type": "object",
369+
"properties": {
370+
"sessionIdentifier": {
371+
"type": "string",
372+
"description": "The identifier of the session that contains the tables."
373+
},
374+
"accessKeys": {
375+
"type": "array",
376+
"description": "An array of table variables to summarize.",
377+
"items": {
378+
"type": "array",
379+
"description": "A list of access keys that identify a variable by specifying its path.",
380+
"items": {
381+
"type": "string",
382+
"description": "An access key that uniquely identifies a variable among its siblings."
383+
}
384+
}
385+
}
386+
},
387+
"required": [
388+
"sessionIdentifier",
389+
"accessKeys"
390+
]
391+
},
392+
"tags": [
393+
"positron-assistant"
394+
]
395+
},
363396
{
364397
"name": "getProjectTree",
365398
"displayName": "Get Project Tree",

extensions/positron-assistant/src/md/prompts/chat/agent.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,29 @@ If the user asks you _how_ to do something, or asks for code rather than
2020
results, generate the code and return it directly without trying to execute it.
2121
</communication>
2222

23+
<data-querying>
24+
25+
**Data Object Information Workflow:**
26+
27+
When the user asks questions that require detailed information about tabular
28+
data objects (DataFrames, arrays, matrices, etc.), use the `getTableSummary`
29+
tool to retrieve structured information such as data summaries and statistics.
30+
Currently, this tool is only available for Python so in R sessions you will need
31+
to execute code to query in-memory data.
32+
33+
To use the tool effectively:
34+
35+
1. First ensure you have the correct `sessionIdentifier` from the user context
36+
2. Provide the `accessKeys` array with the path to the specific data objects
37+
- Each access key is an array of strings representing the path to the variable
38+
- If the user references a variable by name, determine the access key from context or previous tool results
39+
3. Do not call this tool when:
40+
- The variables do not appear in the user context
41+
- There is no active session
42+
- The user only wants to see the structure/children of objects (use `inspectVariables` instead)
43+
44+
</data-querying>
45+
2346
<package-management>
2447
You adhere to the following workflow when dealing with package management:
2548

extensions/positron-assistant/src/participants.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,11 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici
283283
// Only include the documentCreate tool in the chat pane in edit or agent mode.
284284
case PositronAssistantToolName.DocumentCreate:
285285
return inChatPane && (isEditMode || isAgentMode);
286+
// Only include the getTableSummary tool for Python sessions until supported in R
287+
case PositronAssistantToolName.GetTableSummary:
288+
// TODO: Remove this restriction when the tool is supported in R https://github.com/posit-dev/positron/issues/8343
289+
// The logic above with TOOL_TAG_REQUIRES_ACTIVE_SESSION will handle checking for active sessions once this is removed.
290+
return activeSessions.has('python');
286291
// Otherwise, include the tool if it is tagged for use with Positron Assistant.
287292
// Allow all tools in Agent mode.
288293
default:

extensions/positron-assistant/src/tools.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,56 @@ export function registerAssistantTools(
299299

300300
context.subscriptions.push(inspectVariablesTool);
301301

302+
const getTableSummaryTool = vscode.lm.registerTool<{ sessionIdentifier: string; accessKeys: Array<Array<string>> }>(PositronAssistantToolName.GetTableSummary, {
303+
/**
304+
* Called to get a summary information for one or more tabular datasets in the current session.
305+
* @param options The options for the tool invocation.
306+
* @param token The cancellation token.
307+
* @returns A vscode.LanguageModelToolResult containing the data summary.
308+
*/
309+
invoke: async (options, token) => {
310+
311+
// If no session identifier is provided, return an empty array.
312+
if (!options.input.sessionIdentifier || options.input.sessionIdentifier === 'undefined') {
313+
return new vscode.LanguageModelToolResult([
314+
new vscode.LanguageModelTextPart('[[]]')
315+
]);
316+
}
317+
318+
// temporarily only enable for Python sessions
319+
let session: positron.LanguageRuntimeSession | undefined;
320+
const sessions = await positron.runtime.getActiveSessions();
321+
if (sessions && sessions.length > 0) {
322+
session = sessions.find(
323+
(session) => session.metadata.sessionId === options.input.sessionIdentifier,
324+
);
325+
}
326+
if (!session) {
327+
return new vscode.LanguageModelToolResult([
328+
new vscode.LanguageModelTextPart('[[]]')
329+
]);
330+
}
331+
332+
if (session.runtimeMetadata.languageId !== 'python') {
333+
return new vscode.LanguageModelToolResult([
334+
new vscode.LanguageModelTextPart('[[]]')
335+
]);
336+
}
337+
338+
// Call the Positron API to get the session variable data summaries
339+
const result = await positron.runtime.querySessionTables(
340+
options.input.sessionIdentifier,
341+
options.input.accessKeys,
342+
['summary_stats']);
343+
344+
// Return the result as a JSON string to the model
345+
return new vscode.LanguageModelToolResult([
346+
new vscode.LanguageModelTextPart(JSON.stringify(result))
347+
]);
348+
}
349+
});
350+
context.subscriptions.push(getTableSummaryTool);
351+
302352
const installPythonPackageTool = vscode.lm.registerTool<{
303353
packages: string[];
304354
}>(PositronAssistantToolName.InstallPythonPackage, {

extensions/positron-assistant/src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export enum PositronAssistantToolName {
77
DocumentEdit = 'documentEdit',
88
EditFile = 'positron_editFile_internal',
99
ExecuteCode = 'executeCode',
10+
GetTableSummary = 'getTableSummary',
1011
GetPlot = 'getPlot',
1112
InstallPythonPackage = 'installPythonPackage',
1213
InspectVariables = 'inspectVariables',

0 commit comments

Comments
 (0)