Skip to content

data explorer: "Copy as Code" comms #8536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Jul 21, 2025
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
02a2c28
make action and button
isabelizimm Jul 2, 2025
dc9ef57
Merge branch 'main' into export-to-code
isabelizimm Jul 8, 2025
6f502e0
plumb through comms
isabelizimm Jul 9, 2025
dcf380b
export -> copy
isabelizimm Jul 10, 2025
32eb2c0
add modal on filter bar
isabelizimm Jul 11, 2025
f9a2c01
ui updates
isabelizimm Jul 14, 2025
2b9f962
make calls from modal
isabelizimm Jul 15, 2025
5083889
clean up comms
isabelizimm Jul 15, 2025
5be588b
clean up modal
isabelizimm Jul 15, 2025
d59fd66
linting for python
isabelizimm Jul 15, 2025
cc2d069
nit to modal
isabelizimm Jul 15, 2025
bae6ced
Merge branch 'export-as-code-comms' into copy-as-code-comms
isabelizimm Jul 15, 2025
4d624bc
updates from copilot
isabelizimm Jul 15, 2025
ac12c66
nit
isabelizimm Jul 15, 2025
0d6e7d9
cleanup
isabelizimm Jul 15, 2025
c33df7c
updates from review
isabelizimm Jul 16, 2025
45cd402
guess syntax immediately
isabelizimm Jul 16, 2025
1adfc02
reorder guessing
isabelizimm Jul 16, 2025
7a0d40b
no undefined for guessing
isabelizimm Jul 16, 2025
9cbe02a
python lint
isabelizimm Jul 17, 2025
51893f0
Update positron/comms/data_explorer-backend-openrpc.json
isabelizimm Jul 17, 2025
9517202
rename, early exits
isabelizimm Jul 17, 2025
36bf5f5
use supported feature flag for syntaxes
isabelizimm Jul 17, 2025
da70e52
refactor
isabelizimm Jul 17, 2025
b6bbe5c
clean up comm names
isabelizimm Jul 17, 2025
ccd15c4
updates from review
isabelizimm Jul 18, 2025
0cc6f14
tweaks to runtime
isabelizimm Jul 18, 2025
0c0ffac
Merge branch 'main' into copy-as-code-comms
isabelizimm Jul 18, 2025
a85ef9f
lint post merge from main
isabelizimm Jul 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .data_explorer_comm import (
ArraySelection,
BackendState,
CodeSyntaxOptions,
ColumnDisplayType,
ColumnFilter,
ColumnFilterType,
Expand All @@ -53,6 +54,7 @@
DataSelectionSingleCell,
ExportDataSelectionFeatures,
ExportDataSelectionRequest,
ExportedCode,
ExportedData,
ExportFormat,
FilterBetween,
Expand All @@ -63,6 +65,7 @@
FilterSetMembership,
FilterTextSearch,
FormatOptions,
GetCodeSyntaxesRequest,
GetColumnProfilesFeatures,
GetColumnProfilesRequest,
GetDataValuesRequest,
Expand Down Expand Up @@ -94,6 +97,7 @@
TableSelectionKind,
TableShape,
TextSearchType,
TranslateToCodeRequest,
)
from .positron_comm import CommMessage, PositronComm
from .utils import BackgroundJobQueue, guid
Expand Down Expand Up @@ -282,6 +286,12 @@ def get_schema(self, request: GetSchemaRequest):
def _get_single_column_schema(self, column_index: int) -> ColumnSchema:
raise NotImplementedError

def get_code_syntaxes(self, request: GetCodeSyntaxesRequest):
raise NotImplementedError

def translate_to_code(self, request: TranslateToCodeRequest):
raise NotImplementedError

def search_schema(self, request: SearchSchemaRequest):
filters = request.params.filters
start_index = request.params.start_index
Expand Down Expand Up @@ -1351,6 +1361,14 @@ def schema_getter(column_name, column_index):

return schema_updated, new_state

def get_code_syntaxes(self, request: GetCodeSyntaxesRequest): # noqa: ARG002
"""Returns the supported code types for exporting data."""
return CodeSyntaxOptions(code_syntaxes=["pandas"]).dict()

def translate_to_code(self, request: TranslateToCodeRequest): # noqa: ARG002
"""Translates the current data view, including filters and sorts, into a code snippet."""
return ExportedCode(data="import pandas as pd\n\n# TODO: Implement export to code").dict()

@classmethod
def _construct_schema(
cls, column, column_name, column_index: int, state: DataExplorerState
Expand Down Expand Up @@ -2281,6 +2299,14 @@ def schema_getter(column_name, column_index):

return schema_updated, new_state

def get_code_syntaxes(self, request: GetCodeSyntaxesRequest): # noqa: ARG002
"""Returns the supported code types for exporting data."""
return CodeSyntaxOptions(code_syntaxes=["polars"]).dict()

def translate_to_code(self, request: TranslateToCodeRequest): # noqa: ARG002
"""Translates the current data view, including filters and sorts, into a code snippet."""
return ExportedCode(data="import polars as pl\n\n# TODO: Implement export to code").dict()

def _get_single_column_schema(self, column_index: int):
if self.state.schema_cache:
return self.state.schema_cache[column_index]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,27 @@ class ExportedData(BaseModel):
)


class ExportedCode(BaseModel):
"""
Resulting code
"""

data: Optional[StrictStr] = Field(
default=None,
description="Exported code as a string suitable for copy and paste",
)


class CodeSyntaxOptions(BaseModel):
"""
Code syntaxes available for export
"""

code_syntaxes: List[StrictStr] = Field(
description="Available code syntaxes supported for export",
)


class FilterResult(BaseModel):
"""
The result of applying filters to a table
Expand Down Expand Up @@ -1208,6 +1229,12 @@ class DataExplorerBackendRequest(str, enum.Enum):
# Export data selection as a string in different formats
ExportDataSelection = "export_data_selection"

# Translates the current data view into a code snippet.
TranslateToCode = "translate_to_code"

# Get code syntaxes supported for code translation
GetCodeSyntaxes = "get_code_syntaxes"

# Set column filters to select subset of table columns
SetColumnFilters = "set_column_filters"

Expand Down Expand Up @@ -1422,6 +1449,65 @@ class ExportDataSelectionRequest(BaseModel):
)


class TranslateToCodeParams(BaseModel):
"""
Translate filters and sort keys as code in different syntaxes like
pandas, polars, data.table, dplyr
"""

column_filters: List[ColumnFilter] = Field(
description="Zero or more column filters to apply",
)

row_filters: List[RowFilter] = Field(
description="Zero or more row filters to apply",
)

sort_keys: List[ColumnSortKey] = Field(
description="Zero or more sort keys to apply",
)

code_syntax: StrictStr = Field(
description="The code syntax to use for translation",
)


class TranslateToCodeRequest(BaseModel):
"""
Translate filters and sort keys as code in different syntaxes like
pandas, polars, data.table, dplyr
"""

params: TranslateToCodeParams = Field(
description="Parameters to the TranslateToCode method",
)

method: Literal[DataExplorerBackendRequest.TranslateToCode] = Field(
description="The JSON-RPC method name (translate_to_code)",
)

jsonrpc: str = Field(
default="2.0",
description="The JSON-RPC version specifier",
)


class GetCodeSyntaxesRequest(BaseModel):
"""
Get all available code syntaxes supported for translation for a data
view
"""

method: Literal[DataExplorerBackendRequest.GetCodeSyntaxes] = Field(
description="The JSON-RPC method name (get_code_syntaxes)",
)

jsonrpc: str = Field(
default="2.0",
description="The JSON-RPC version specifier",
)


class SetColumnFiltersParams(BaseModel):
"""
Set or clear column filters on table, replacing any previous filters
Expand Down Expand Up @@ -1575,6 +1661,8 @@ class DataExplorerBackendMessageContent(BaseModel):
GetDataValuesRequest,
GetRowLabelsRequest,
ExportDataSelectionRequest,
TranslateToCodeRequest,
GetCodeSyntaxesRequest,
SetColumnFiltersRequest,
SetRowFiltersRequest,
SetSortColumnsRequest,
Expand Down Expand Up @@ -1623,6 +1711,10 @@ class ReturnColumnProfilesParams(BaseModel):

ExportedData.update_forward_refs()

ExportedCode.update_forward_refs()

CodeSyntaxOptions.update_forward_refs()

FilterResult.update_forward_refs()

BackendState.update_forward_refs()
Expand Down Expand Up @@ -1741,6 +1833,12 @@ class ReturnColumnProfilesParams(BaseModel):

ExportDataSelectionRequest.update_forward_refs()

TranslateToCodeParams.update_forward_refs()

TranslateToCodeRequest.update_forward_refs()

GetCodeSyntaxesRequest.update_forward_refs()

SetColumnFiltersParams.update_forward_refs()

SetColumnFiltersRequest.update_forward_refs()
Expand Down
99 changes: 98 additions & 1 deletion positron/comms/data_explorer-backend-openrpc.json
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,98 @@
}
}
},
{
"name": "translate_to_code",
"summary": "Translates the current data view into a code snippet.",
"description": "Translate filters and sort keys as code in different syntaxes like pandas, polars, data.table, dplyr",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think "translate" is not a great verb here, because it could have so many meanings. Translate from English to another language? Translate from R to Python? I realize there's a lot of context to help someone clarify this, but I suspect it would be even better to use a different verb.

For example, I think "convert" or "generate" are better.

This choice obviously affects a lot of locations (every instance of "translate", "translation", "translated", etc.), so I won't repeat the comment elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had started with generate and moved away from it since I felt it was too AI-y, but I'm on board with convert instead 💯

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hear you re: "generate" and "AI-y" in terms of the UI. But internally, I think "generate" is a very viable option. I'm thinking about how nice "generated code" feels versus "converted code".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In any case, both "generate" and "convert" might be handy for actual names and then also for docstrings.

"params": [
{
"name": "column_filters",
"description": "Zero or more column filters to apply",
"required": true,
"schema": {
"type": "array",
"items": {
"$ref": "#/components/schemas/column_filter"
}
}
},
{
"name": "row_filters",
"description": "Zero or more row filters to apply",
"required": true,
"schema": {
"type": "array",
"items": {
"$ref": "#/components/schemas/row_filter"
}
}
},
{
"name": "sort_keys",
"description": "Zero or more sort keys to apply",
"required": true,
"schema": {
"type": "array",
"items": {
"$ref": "#/components/schemas/column_sort_key"
}
}
},
{
"name": "code_syntax",
"description": "The code syntax to use for translation",
"required": true,
"schema": {
"type": "string",
"items": {
"$ref": "#/components/schemas/code_syntax"
}
}
}
],
"result": {
"schema": {
"name": "exported_code",
"type": "object",
"description": "Resulting code",
"required": [
"code"
],
"properties": {
"data": {
"type": "string",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How sure are we that we want a string? As opposed to an array of strings, i.e. one per line. I can't say exactly if/why this matters, but somehow sending lines of code back to the front end feels more proper than a single collapsed string.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I like this idea! It does feel more proper, somehow. An array would probably be easier for formatting anyway; I'll move over to this structure. 👍

"description": "Exported code as a string suitable for copy and paste"
}
}
}
}
},
{
"name": "get_code_syntaxes",
"summary": "Get code syntaxes supported for code translation",
"description": "Get all available code syntaxes supported for translation for a data view",
Copy link
Member

@jennybc jennybc Jul 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like there's some notion that the backend is expected to reply using knowledge of the type of data being viewed. In the R world, i.e. that the backend will know the class(es) of the object, in this sense:

class(palmerpenguins::penguins)
#> [1] "tbl_df"     "tbl"        "data.frame"

which would effect which syntaxes are supported or which syntax is the default.

I don't see this information being sent over. Does the backend obtain that info another way? I do understand that the object in question must already exist in the runtime and, therefore, the backend already knows (or can know) this. And I even have some sense that all of this is unfolding inside a data explorer instance and therefore the target data object is implicitly available. I just don't understand the information flow yet.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The table lives on the RDataExplorer, which is created when the data explorer is opened. We should be able to use that table to get information like class without explicitly passing it over the comm.

"params": [],
"result": {
"schema": {
"name": "code_syntax_options",
"type": "object",
"description": "Code syntaxes available for export",
"required": [
"code_syntaxes"
],
"properties": {
"code_syntaxes": {
"type": "array",
"description": "Available code syntaxes supported for export",
"items": {
"type": "string"
}
}
}
}
}
},
{
"name": "set_column_filters",
"summary": "Set column filters to select subset of table columns",
Expand Down Expand Up @@ -1293,6 +1385,10 @@
}
}
},
"code_syntax": {
"type": "string",
"description": "The syntax for translated code"
},
"supported_features": {
"type": "object",
"description": "For each field, returns flags indicating supported features",
Expand All @@ -1302,7 +1398,8 @@
"set_row_filters",
"get_column_profiles",
"set_sort_columns",
"export_data_selection"
"export_data_selection",
"export_as_code"
],
"properties": {
"search_schema": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { Emitter, Event } from '../../../../base/common/event.js';
import { Disposable } from '../../../../base/common/lifecycle.js';
import { URI } from '../../../../base/common/uri.js';
import { generateUuid } from '../../../../base/common/uuid.js';
import { ArraySelection, BackendState, ColumnFilter, ColumnProfileRequest, ColumnProfileResult, ColumnSchema, ColumnSelection, ColumnSortKey, DataExplorerFrontendEvent, DataUpdateEvent, ExportedData, ExportFormat, FilterResult, FormatOptions, ReturnColumnProfilesEvent, RowFilter, SchemaUpdateEvent, SupportedFeatures, SupportStatus, TableData, TableRowLabels, TableSchema, TableSelection } from './positronDataExplorerComm.js';
import { ArraySelection, BackendState, CodeSyntaxOptions, ColumnFilter, ColumnProfileRequest, ColumnProfileResult, ColumnSchema, ColumnSelection, ColumnSortKey, DataExplorerFrontendEvent, DataUpdateEvent, ExportedCode, ExportedData, ExportFormat, FilterResult, FormatOptions, ReturnColumnProfilesEvent, RowFilter, SchemaUpdateEvent, SupportedFeatures, SupportStatus, TableData, TableRowLabels, TableSchema, TableSelection } from './positronDataExplorerComm.js';

/**
* TableSchemaSearchResult interface. This is here temporarily until searching the tabe schema
Expand Down Expand Up @@ -66,6 +66,8 @@ export interface IDataExplorerBackendClient extends Disposable {
getDataValues(columns: Array<ColumnSelection>, formatOptions: FormatOptions): Promise<TableData>;
getRowLabels(selection: ArraySelection, formatOptions: FormatOptions): Promise<TableRowLabels>;
exportDataSelection(selection: TableSelection, format: ExportFormat): Promise<ExportedData>;
getCodeSyntaxes(): Promise<CodeSyntaxOptions>;
translateToCode(columnFilters: Array<ColumnFilter>, rowFilters: Array<RowFilter>, sortKeys: Array<ColumnSortKey>, exportOptions: string): Promise<ExportedCode>;
setColumnFilters(filters: Array<ColumnFilter>): Promise<void>;
setRowFilters(filters: Array<RowFilter>): Promise<FilterResult>;
setSortColumns(sortKeys: Array<ColumnSortKey>): Promise<void>;
Expand Down Expand Up @@ -496,6 +498,32 @@ export class DataExplorerClientInstance extends Disposable {
}
}


async translateToCode(desiredSyntax: string): Promise<ExportedCode> {
const state = await this.getBackendState();
//const codeTypes = await this.getCodeTypes();

const columnFilters: Array<ColumnFilter> = state.column_filters;
const rowFilters: Array<RowFilter> = state.row_filters;
const sortKeys: Array<ColumnSortKey> = state.sort_keys;

return this.runBackendTask(
() => this._backendClient.translateToCode(columnFilters, rowFilters, sortKeys, desiredSyntax),
() => ({ 'data': '' })
);
}

/**
* Get the code syntaxes supported for export.
* @returns A promise that resolves to the available code syntaxes.
*/
async getCodeSyntaxes(): Promise<CodeSyntaxOptions> {
return this.runBackendTask(
() => this._backendClient.getCodeSyntaxes(),
() => ({ code_syntaxes: [] })
);
}

//#endregion Public Methods

//#region Private Methods
Expand Down
Loading
Loading