From 8c7eff1533d83962a1102b9e23fade9a1ce7f898 Mon Sep 17 00:00:00 2001 From: Eugen Neufeld Date: Sun, 6 Apr 2025 21:31:01 +0200 Subject: [PATCH 1/5] Add initial support for images in the ai chat Initial implementation of #15407 --- package-lock.json | 1 + .../src/node/anthropic-language-model.ts | 7 +- .../ai-chat-ui/src/browser/ImagePreview.tsx | 53 ++++++ .../src/browser/chat-input-widget.tsx | 159 +++++++++++++++--- .../chat-tree-view/chat-view-tree-widget.tsx | 24 ++- .../src/browser/chat-view-contribution.ts | 2 +- .../src/browser/chat-view-widget.tsx | 19 ++- .../ai-chat-ui/src/browser/style/index.css | 70 ++++++++ packages/ai-chat/src/common/chat-agents.ts | 21 ++- packages/ai-chat/src/common/chat-model.ts | 16 +- .../ai-chat/src/common/chat-request-parser.ts | 116 ++++++------- packages/ai-chat/src/common/chat-service.ts | 1 + packages/ai-core/src/common/language-model.ts | 22 ++- .../src/node/google-language-model.ts | 5 +- .../src/node/openai-language-model.ts | 17 +- 15 files changed, 428 insertions(+), 105 deletions(-) create mode 100644 packages/ai-chat-ui/src/browser/ImagePreview.tsx diff --git a/package-lock.json b/package-lock.json index de15174675844..ee7011c3d9714 100644 --- a/package-lock.json +++ b/package-lock.json @@ -263,6 +263,7 @@ "dev-packages/ffmpeg": { "name": "@theia/ffmpeg", "version": "1.61.0", + "hasInstallScript": true, "license": "EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0", "dependencies": { "@electron/get": "^2.0.0", diff --git a/packages/ai-anthropic/src/node/anthropic-language-model.ts b/packages/ai-anthropic/src/node/anthropic-language-model.ts index 3d697690d0142..b967488b55398 100644 --- a/packages/ai-anthropic/src/node/anthropic-language-model.ts +++ b/packages/ai-anthropic/src/node/anthropic-language-model.ts @@ -24,7 +24,8 @@ import { LanguageModelTextResponse, TokenUsageService, TokenUsageParams, - UserRequest + UserRequest, + LLMImageData } from '@theia/ai-core'; import { CancellationToken, isArray } from '@theia/core'; import { Anthropic } from '@anthropic-ai/sdk'; @@ -48,6 +49,10 @@ const createMessageContent = (message: LanguageModelMessage): MessageParam['cont return [{ id: message.id, input: message.input, name: message.name, type: 'tool_use' }]; } else if (LanguageModelMessage.isToolResultMessage(message)) { return [{ type: 'tool_result', tool_use_id: message.tool_use_id }]; + } else if (LanguageModelMessage.isImageMessage(message)) { + if (LLMImageData.isBase64ImageData(message.image)) { + return [{ type: 'image', source: { type: 'base64', media_type: message.image.mediaType, data: message.image.imageData } }]; + } } throw new Error(`Unknown message type:'${JSON.stringify(message)}'`); }; diff --git a/packages/ai-chat-ui/src/browser/ImagePreview.tsx b/packages/ai-chat-ui/src/browser/ImagePreview.tsx new file mode 100644 index 0000000000000..53814e84087e5 --- /dev/null +++ b/packages/ai-chat-ui/src/browser/ImagePreview.tsx @@ -0,0 +1,53 @@ +// ***************************************************************************** +// Copyright (C) 2025 EclipseSource GmbH. +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License v. 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0. +// +// This Source Code may also be made available under the following Secondary +// Licenses when the conditions for such availability set forth in the Eclipse +// Public License v. 2.0 are satisfied: GNU General Public License, version 2 +// with the GNU Classpath Exception which is available at +// https://www.gnu.org/software/classpath/license.html. +// +// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 +// ***************************************************************************** +import { nls } from '@theia/core'; +import * as React from '@theia/core/shared/react'; + +// Interface for pasted image data +export interface PastedImage { + id: string; + data: string; + name: string; + type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; +} + +// Image Preview Component +interface ImagePreviewProps { + images: PastedImage[]; + onRemove: (id: string) => void; +} +export const ImagePreview: React.FC = ({ images, onRemove }) => { + if (images.length === 0) { return undefined; } + + return ( +
+ {images.map(img => ( +
+ {img.name} +
+ { + e.stopPropagation(); + onRemove(img.id); + }} /> +
+
+ ))} +
+ ); +}; diff --git a/packages/ai-chat-ui/src/browser/chat-input-widget.tsx b/packages/ai-chat-ui/src/browser/chat-input-widget.tsx index 00432964aecef..fe7ba09abb617 100644 --- a/packages/ai-chat-ui/src/browser/chat-input-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-input-widget.tsx @@ -14,21 +14,22 @@ // SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 // ***************************************************************************** import { ChangeSet, ChatAgent, ChatChangeEvent, ChatModel, ChatRequestModel, ChatService, ChatSuggestion } from '@theia/ai-chat'; +import { ChangeSetDecoratorService } from '@theia/ai-chat/lib/browser/change-set-decorator-service'; +import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core'; +import { FrontendVariableService } from '@theia/ai-core/lib/browser'; import { Disposable, DisposableCollection, InMemoryResources, URI, nls } from '@theia/core'; import { ContextMenuRenderer, LabelProvider, Message, OpenerService, ReactWidget } from '@theia/core/lib/browser'; import { Deferred } from '@theia/core/lib/common/promise-util'; import { inject, injectable, optional, postConstruct } from '@theia/core/shared/inversify'; import * as React from '@theia/core/shared/react'; import { IMouseEvent } from '@theia/monaco-editor-core'; -import { SimpleMonacoEditor } from '@theia/monaco/lib/browser/simple-monaco-editor'; import { MonacoEditorProvider } from '@theia/monaco/lib/browser/monaco-editor-provider'; -import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution'; -import { AIVariableResolutionRequest } from '@theia/ai-core'; -import { FrontendVariableService } from '@theia/ai-core/lib/browser'; -import { ContextVariablePicker } from './context-variable-picker'; +import { SimpleMonacoEditor } from '@theia/monaco/lib/browser/simple-monaco-editor'; +import { ImagePreview, PastedImage } from './ImagePreview'; import { ChangeSetActionRenderer, ChangeSetActionService } from './change-set-actions/change-set-action-service'; -import { ChangeSetDecoratorService } from '@theia/ai-chat/lib/browser/change-set-decorator-service'; import { ChatInputAgentSuggestions } from './chat-input-agent-suggestions'; +import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution'; +import { ContextVariablePicker } from './context-variable-picker'; type Query = (query: string) => Promise; type Unpin = () => void; @@ -37,6 +38,12 @@ type DeleteChangeSet = (requestModel: ChatRequestModel) => void; type DeleteChangeSetElement = (requestModel: ChatRequestModel, index: number) => void; type OpenContextElement = (request: AIVariableResolutionRequest) => unknown; +// Interface for the payload submitted to the AI +// interface ChatPayload { +// text: string; +// images?: PastedImage[]; +// } + export const AIChatInputConfiguration = Symbol('AIChatInputConfiguration'); export interface AIChatInputConfiguration { showContext?: boolean; @@ -132,6 +139,12 @@ export class AIChatInputWidget extends ReactWidget { this.update(); } + // State for pasted images + private _pastedImages: PastedImage[] = []; + public get pastedImages(): PastedImage[] { + return this._pastedImages; + } + @postConstruct() protected init(): void { this.id = AIChatInputWidget.ID; @@ -139,6 +152,41 @@ export class AIChatInputWidget extends ReactWidget { this.update(); } + // Process a file blob into an image + private processImageFromClipboard(blob: File): void { + const reader = new FileReader(); + reader.onload = e => { + if (!e.target?.result) { return; } + + const imageId = `img-${Date.now()}`; + const dataUrl = e.target.result as string; + + // Extract the base64 data by removing the data URL prefix + // Format is like:  + const imageData = dataUrl.substring(dataUrl.indexOf(',') + 1); + + // Add image to state + const newImage: PastedImage = { + id: imageId, + data: imageData, // Store just the base64 data without the prefix + name: blob.name || `pasted-image-${Date.now()}.png`, + type: blob.type as PastedImage['type'] + }; + + this._pastedImages = [...this._pastedImages, newImage]; + + this.update(); + }; + + reader.readAsDataURL(blob); + } + + // Remove an image by id + public removeImage(id: string): void { + this._pastedImages = this._pastedImages.filter(img => img.id !== id); + this.update(); + } + protected override onActivateRequest(msg: Message): void { super.onActivateRequest(msg); this.editorReady.promise.then(() => { @@ -185,6 +233,9 @@ export class AIChatInputWidget extends ReactWidget { decoratorService={this.changeSetDecoratorService} initialValue={this._initialValue} openerService={this.openerService} + pastedImages={this._pastedImages} + onRemoveImage={this.removeImage.bind(this)} + onImagePasted={this.processImageFromClipboard.bind(this)} suggestions={this._chatModel.suggestions} /> ); @@ -268,7 +319,7 @@ export class AIChatInputWidget extends ReactWidget { interface ChatInputProperties { onCancel: (requestModel: ChatRequestModel) => void; - onQuery: (query: string) => void; + onQuery: (query?: string, images?: LLMImageData[]) => void; onUnpin: () => void; onDragOver: (event: React.DragEvent) => void; onDrop: (event: React.DragEvent) => void; @@ -294,6 +345,9 @@ interface ChatInputProperties { decoratorService: ChangeSetDecoratorService; initialValue?: string; openerService: OpenerService; + pastedImages: PastedImage[]; + onRemoveImage: (id: string) => void; + onImagePasted: (blob: File) => void; suggestions: readonly ChatSuggestion[] } @@ -321,6 +375,38 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu // eslint-disable-next-line no-null/no-null const placeholderRef = React.useRef(null); const editorRef = React.useRef(undefined); + // eslint-disable-next-line no-null/no-null + const containerRef = React.useRef(null); + + // Handle paste events on the container + const handlePaste = React.useCallback((e: ClipboardEvent) => { + if (!e.clipboardData?.items) { return; } + + for (const item of e.clipboardData.items) { + if (item.type.startsWith('image/')) { + const blob = item.getAsFile(); + if (blob) { + e.preventDefault(); + e.stopPropagation(); + props.onImagePasted(blob); + break; + } + } + } + }, [props.onImagePasted]); + + // Set up paste handler on the container div + React.useEffect(() => { + const container = containerRef.current; + if (container) { + container.addEventListener('paste', handlePaste, true); + + return () => { + container.removeEventListener('paste', handlePaste, true); + }; + } + return undefined; + }, [handlePaste]); React.useEffect(() => { const uri = props.resourceUriProvider(); @@ -451,7 +537,7 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu responseListenerRef.current?.dispose(); responseListenerRef.current = undefined; }; - }, [props.chatModel]); + }, [props.chatModel, props.actionService, props.labelProvider]); React.useEffect(() => { const disposable = props.actionService.onDidChange(() => { @@ -460,7 +546,14 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu setChangeSetUI(current => !current ? current : { ...current, actions: newActions }); }); return () => disposable.dispose(); - }); + }, [props.actionService, props.chatModel.changeSet]); + + // // Extract image references from text + // const extractImageReferences = (text: string): string[] => { + // const regex = /!\[.*?\]\((img-\d+)\)/g; + // const matches = [...text.matchAll(regex)]; + // return matches.map(match => match[1]); + // }; React.useEffect(() => { const disposable = props.decoratorService.onDidChangeDecorations(() => { @@ -486,13 +579,19 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu }, [editorRef]); const submit = React.useCallback(function submit(value: string): void { - if (!value || value.trim().length === 0) { + if ((!value || value.trim().length === 0) && props.pastedImages.length === 0) { return; } + setInProgress(true); - props.onQuery(value); + props.onQuery(value, props.pastedImages.map(p => ({ imageData: p.data, mediaType: p.type }))); setValue(''); - }, [props.context, props.onQuery, setValue]); + + if (editorRef.current) { + editorRef.current.document.textEditorModel.setValue(''); + }// Clear pasted images after submission + props.pastedImages.forEach(image => props.onRemoveImage(image.id)); + }, [props.context, props.onQuery, setValue, props.pastedImages]); const onKeyDown = React.useCallback((event: React.KeyboardEvent) => { if (!props.isEnabled) { @@ -592,21 +691,31 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu const contextUI = buildContextUI(props.context, props.labelProvider, props.onDeleteContextElement, props.onOpenContextElement); - return
- {} - {props.showChangeSet && changeSetUI?.elements && - - } -
-
-
{nls.localizeByDefault('Ask a question')}
-
- {props.context && props.context.length > 0 && - + return ( +
+ {} + {props.showChangeSet && changeSetUI?.elements && + } - +
+
+
{nls.localizeByDefault('Ask a question')}
+
+ {props.pastedImages.length > 0 && + + } + {props.context && props.context.length > 0 && + + } + +
-
; + ); }; const noPropagation = (handler: () => void) => (e: React.MouseEvent) => { diff --git a/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx b/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx index 93e6c03eda83d..4f03d639fd3d5 100644 --- a/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx @@ -24,14 +24,13 @@ import { EditableChatRequestModel, ParsedChatRequestAgentPart, ParsedChatRequestVariablePart, - type ChatRequest, type ChatHierarchyBranch, + type ChatRequest, } from '@theia/ai-chat'; -import { AIVariableService } from '@theia/ai-core'; +import { AIVariableService, LLMImageData } from '@theia/ai-core'; import { AIActivationService } from '@theia/ai-core/lib/browser'; import { CommandRegistry, ContributionProvider, Disposable, DisposableCollection, Emitter } from '@theia/core'; import { - codicon, CompositeTreeNode, ContextMenuRenderer, HoverService, @@ -44,6 +43,7 @@ import { TreeProps, TreeWidget, Widget, + codicon, type ReactWidget } from '@theia/core/lib/browser'; import { nls } from '@theia/core/lib/common/nls'; @@ -56,9 +56,9 @@ import { } from '@theia/core/shared/inversify'; import * as React from '@theia/core/shared/react'; import { ChatNodeToolbarActionContribution } from '../chat-node-toolbar-action-contribution'; +import { ProgressMessage } from '../chat-progress-message'; import { ChatResponsePartRenderer } from '../chat-response-part-renderer'; import { useMarkdownRendering } from '../chat-response-renderer/markdown-part-renderer'; -import { ProgressMessage } from '../chat-progress-message'; import { AIChatTreeInputFactory, type AIChatTreeInputWidget } from './chat-view-tree-input-widget'; // TODO Instead of directly operating on the ChatRequestModel we could use an intermediate view model @@ -550,6 +550,8 @@ const ChatRequestRender = ( provideChatInputWidget: () => ReactWidget | undefined, }) => { const parts = node.request.message.parts; + const images = node.request.images || []; + if (EditableChatRequestModel.isEditing(node.request)) { const widget = provideChatInputWidget(); if (widget) { @@ -624,6 +626,20 @@ const ChatRequestRender = ( } })}

+ {images.length > 0 && ( +
+ {images.map((img, index) => ( +
+ {LLMImageData.isBase64ImageData(img) ? + {`Image : undefined} +
+ ))} +
+ )} {renderFooter()}
); diff --git a/packages/ai-chat-ui/src/browser/chat-view-contribution.ts b/packages/ai-chat-ui/src/browser/chat-view-contribution.ts index a5cad528fb951..101f4de642be4 100644 --- a/packages/ai-chat-ui/src/browser/chat-view-contribution.ts +++ b/packages/ai-chat-ui/src/browser/chat-view-contribution.ts @@ -117,7 +117,7 @@ export class ChatViewMenuContribution implements MenuContribution, CommandContri protected getCopyText(arg: RequestNode | ResponseNode): string { if (isRequestNode(arg)) { - return arg.request.request.text; + return arg.request.request.text ?? ''; } else if (isResponseNode(arg)) { return arg.response.response.asDisplayString(); } diff --git a/packages/ai-chat-ui/src/browser/chat-view-widget.tsx b/packages/ai-chat-ui/src/browser/chat-view-widget.tsx index 42c6603143397..22b68ef3b00d1 100644 --- a/packages/ai-chat-ui/src/browser/chat-view-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-view-widget.tsx @@ -21,7 +21,7 @@ import { inject, injectable, postConstruct } from '@theia/core/shared/inversify' import { AIChatInputWidget } from './chat-input-widget'; import { ChatViewTreeWidget } from './chat-tree-view/chat-view-tree-widget'; import { AIActivationService } from '@theia/ai-core/lib/browser/ai-activation-service'; -import { AIVariableResolutionRequest } from '@theia/ai-core'; +import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core'; import { ProgressBarFactory } from '@theia/core/lib/browser/progress-bar-factory'; import { FrontendVariableService } from '@theia/ai-core/lib/browser'; @@ -177,10 +177,21 @@ export class ChatViewWidget extends BaseWidget implements ExtractableWidget, Sta return this.onStateChangedEmitter.event; } - protected async onQuery(query: string | ChatRequest): Promise { - const chatRequest: ChatRequest = typeof query === 'string' ? { text: query } : { ...query }; - if (chatRequest.text.length === 0) { return; } + private isEmptyQuery(query?: string | ChatRequest): boolean { + if (query === undefined) { + return true; + } + if (typeof query === 'string') { + return query.length === 0; + } + return (query.text === undefined || query.text?.length === 0) && + (query.images === undefined || query.images?.length === 0); + } + + protected async onQuery(query?: string | ChatRequest, imageData?: LLMImageData[]): Promise { + if (this.isEmptyQuery(query) && (!imageData || imageData.length === 0)) { return; } + const chatRequest: ChatRequest = typeof query === 'string' ? { text: query, images: imageData } : { ...query }; const requestProgress = await this.chatService.sendRequest(this.chatSession.id, chatRequest); requestProgress?.responseCompleted.then(responseModel => { if (responseModel.isError) { diff --git a/packages/ai-chat-ui/src/browser/style/index.css b/packages/ai-chat-ui/src/browser/style/index.css index 567bb09c07b50..3b5113524d60d 100644 --- a/packages/ai-chat-ui/src/browser/style/index.css +++ b/packages/ai-chat-ui/src/browser/style/index.css @@ -155,6 +155,12 @@ div:last-child > .theia-ChatNode { justify-content: flex-end; gap: 2px; } +.theia-RequestNode-Images { + display: flex; + flex-wrap: wrap; + gap: 8px; + margin-top: 8px; +} .theia-RequestNode-Footer .item { opacity: var(--theia-mod-disabled-opacity); @@ -169,6 +175,19 @@ div:last-child > .theia-ChatNode { cursor: pointer; opacity: 1; } +.theia-RequestNode-ImageContainer { + border: var(--theia-border-width) solid var(--theia-dropdown-border); + border-radius: 4px; + overflow: hidden; + height: 120px; + width: 140px; +} + +.theia-RequestNode-Image { + width: 100%; + height: 100%; + object-fit: contain; +} .theia-RequestNode-Footer :not(.item.enabled) .action-label { background: transparent; @@ -498,6 +517,57 @@ div:last-child > .theia-ChatNode { padding-left: 8px !important; } +/* Image Preview Styles */ +.theia-ChatInput-ImagePreview { + display: flex; + flex-wrap: wrap; + gap: 8px; + padding: 8px; + border-bottom: var(--theia-border-width) solid var(--theia-dropdown-border); +} + +.theia-ChatInput-ImagePreview-Item { + position: relative; + border: var(--theia-border-width) solid var(--theia-dropdown-border); + border-radius: 4px; + overflow: hidden; + height: 100px; + width: 120px; +} + +.theia-ChatInput-ImagePreview-Item img { + width: 100%; + height: 100%; + object-fit: contain; +} + +.theia-ChatInput-ImagePreview-Actions { + position: absolute; + top: 2px; + right: 2px; + background-color: rgba(0, 0, 0, 0.5); + border-radius: 4px; + display: flex; + align-items: center; + justify-content: center; + z-index: 10; +} + +.theia-ChatInput-ImagePreview-Actions .action { + width: 20px; + height: 20px; + display: flex; + align-items: center; + justify-content: center; + cursor: pointer; + color: white; +} + +.theia-ChatInput-ImagePreview-Actions .action:hover { + background-color: rgba(255, 255, 255, 0.2); + border-radius: 4px; +} + .theia-ChatInputOptions { width: 100%; height: 25px; diff --git a/packages/ai-chat/src/common/chat-agents.ts b/packages/ai-chat/src/common/chat-agents.ts index 344b38c5495bf..fe1710fe2036b 100644 --- a/packages/ai-chat/src/common/chat-agents.ts +++ b/packages/ai-chat/src/common/chat-agents.ts @@ -54,17 +54,17 @@ import { inject, injectable, named, postConstruct } from '@theia/core/shared/inv import { ChatAgentService } from './chat-agent-service'; import { ChatModel, - MutableChatRequestModel, + ChatRequestModel, ChatResponseContent, ErrorChatResponseContentImpl, MarkdownChatResponseContentImpl, - ToolCallChatResponseContentImpl, - ChatRequestModel, + MutableChatRequestModel, ThinkingChatResponseContentImpl, + ToolCallChatResponseContentImpl, } from './chat-model'; +import { ChatToolRequest, ChatToolRequestService } from './chat-tool-request-service'; import { parseContents } from './parse-contents'; import { DefaultResponseContentFactory, ResponseContentMatcher, ResponseContentMatcherProvider } from './response-content-matcher'; -import { ChatToolRequest, ChatToolRequestService } from './chat-tool-request-service'; /** * System message content, enriched with function descriptions. @@ -255,10 +255,15 @@ export abstract class AbstractChatAgent implements ChatAgent { const requestMessages = model.getRequests().flatMap(request => { const messages: LanguageModelMessage[] = []; const text = request.message.parts.map(part => part.promptText).join(''); - messages.push({ - actor: 'user', - type: 'text', - text: text, + if (text.length !== 0) { + messages.push({ + actor: 'user', + type: 'text', + text: text, + }); + } + request.images?.forEach(image => { + messages.push({ actor: 'user', type: 'image', image }); }); if (request.response.isComplete || includeResponseInProgress) { const responseMessages: LanguageModelMessage[] = request.response.response.content.flatMap(c => { diff --git a/packages/ai-chat/src/common/chat-model.ts b/packages/ai-chat/src/common/chat-model.ts index 299e75860c4fb..37dc1e2cd4d1b 100644 --- a/packages/ai-chat/src/common/chat-model.ts +++ b/packages/ai-chat/src/common/chat-model.ts @@ -19,7 +19,16 @@ *--------------------------------------------------------------------------------------------*/ // Partially copied from https://github.com/microsoft/vscode/blob/a2cab7255c0df424027be05d58e1b7b941f4ea60/src/vs/workbench/contrib/chat/common/chatModel.ts -import { AIVariableResolutionRequest, LanguageModelMessage, ResolvedAIContextVariable, TextMessage, ThinkingMessage, ToolResultMessage, ToolUseMessage } from '@theia/ai-core'; +import { + AIVariableResolutionRequest, + LanguageModelMessage, + LLMImageData, + ResolvedAIContextVariable, + TextMessage, + ThinkingMessage, + ToolResultMessage, + ToolUseMessage +} from '@theia/ai-core'; import { CancellationToken, CancellationTokenSource, Command, Disposable, DisposableCollection, Emitter, Event, generateUuid, URI } from '@theia/core'; import { MarkdownString, MarkdownStringImpl } from '@theia/core/lib/common/markdown-rendering'; import { Position } from '@theia/core/shared/vscode-languageserver-protocol'; @@ -259,8 +268,9 @@ export interface ChangeSetDecoration { } export interface ChatRequest { - readonly text: string; + readonly text?: string; readonly displayText?: string; + readonly images?: LLMImageData[]; /** * If the request has been triggered in the context of * an existing request, this id will be set to the id of the @@ -280,6 +290,7 @@ export interface ChatRequestModel { readonly request: ChatRequest; readonly response: ChatResponseModel; readonly message: ParsedChatRequest; + readonly images?: LLMImageData[]; readonly context: ChatContext; readonly agentId?: string; readonly data?: { [key: string]: unknown }; @@ -1147,6 +1158,7 @@ export class MutableChatRequestModel implements ChatRequestModel, EditableChatRe protected _context: ChatContext; protected _agentId?: string; protected _data: { [key: string]: unknown }; + public images?: LLMImageData[]; protected _isEditing = false; protected readonly toDispose = new DisposableCollection(); diff --git a/packages/ai-chat/src/common/chat-request-parser.ts b/packages/ai-chat/src/common/chat-request-parser.ts index d6a4c70c4061d..db1f1ed314413 100644 --- a/packages/ai-chat/src/common/chat-request-parser.ts +++ b/packages/ai-chat/src/common/chat-request-parser.ts @@ -104,71 +104,73 @@ export class ChatRequestParserImpl implements ChatRequestParser { const parts: ParsedChatRequestPart[] = []; const variables = new Map(); const toolRequests = new Map(); - const message = request.text; - for (let i = 0; i < message.length; i++) { - const previousChar = message.charAt(i - 1); - const char = message.charAt(i); - let newPart: ParsedChatRequestPart | undefined; + if (request.text) { + const message = request.text; + for (let i = 0; i < message.length; i++) { + const previousChar = message.charAt(i - 1); + const char = message.charAt(i); + let newPart: ParsedChatRequestPart | undefined; - if (previousChar.match(/\s/) || i === 0) { - if (char === chatFunctionLeader) { - const functionPart = this.tryToParseFunction( - message.slice(i), - i - ); - newPart = functionPart; - if (functionPart) { - toolRequests.set(functionPart.toolRequest.id, functionPart.toolRequest); - } - } else if (char === chatVariableLeader) { - const variablePart = this.tryToParseVariable( - message.slice(i), - i, - parts - ); - newPart = variablePart; - if (variablePart) { - const variable = this.variableService.getVariable(variablePart.variableName); - if (variable) { - variables.set(variable.name, variable); + if (previousChar.match(/\s/) || i === 0) { + if (char === chatFunctionLeader) { + const functionPart = this.tryToParseFunction( + message.slice(i), + i + ); + newPart = functionPart; + if (functionPart) { + toolRequests.set(functionPart.toolRequest.id, functionPart.toolRequest); + } + } else if (char === chatVariableLeader) { + const variablePart = this.tryToParseVariable( + message.slice(i), + i, + parts + ); + newPart = variablePart; + if (variablePart) { + const variable = this.variableService.getVariable(variablePart.variableName); + if (variable) { + variables.set(variable.name, variable); + } } + } else if (char === chatAgentLeader) { + newPart = this.tryToParseAgent( + message.slice(i), + i, + parts, + location + ); } - } else if (char === chatAgentLeader) { - newPart = this.tryToParseAgent( - message.slice(i), - i, - parts, - location - ); } - } - if (newPart) { - if (i !== 0) { - // Insert a part for all the text we passed over, then insert the new parsed part - const previousPart = parts.at(-1); - const previousPartEnd = previousPart?.range.endExclusive ?? 0; - parts.push( - new ParsedChatRequestTextPart( - offsetRange(previousPartEnd, i), - message.slice(previousPartEnd, i) - ) - ); - } + if (newPart) { + if (i !== 0) { + // Insert a part for all the text we passed over, then insert the new parsed part + const previousPart = parts.at(-1); + const previousPartEnd = previousPart?.range.endExclusive ?? 0; + parts.push( + new ParsedChatRequestTextPart( + offsetRange(previousPartEnd, i), + message.slice(previousPartEnd, i) + ) + ); + } - parts.push(newPart); + parts.push(newPart); + } } - } - const lastPart = parts.at(-1); - const lastPartEnd = lastPart?.range.endExclusive ?? 0; - if (lastPartEnd < message.length) { - parts.push( - new ParsedChatRequestTextPart( - offsetRange(lastPartEnd, message.length), - message.slice(lastPartEnd, message.length) - ) - ); + const lastPart = parts.at(-1); + const lastPartEnd = lastPart?.range.endExclusive ?? 0; + if (lastPartEnd < message.length) { + parts.push( + new ParsedChatRequestTextPart( + offsetRange(lastPartEnd, message.length), + message.slice(lastPartEnd, message.length) + ) + ); + } } return { parts, toolRequests, variables }; } diff --git a/packages/ai-chat/src/common/chat-service.ts b/packages/ai-chat/src/common/chat-service.ts index 85dc3ecc1d1f1..00ac86f89d2af 100644 --- a/packages/ai-chat/src/common/chat-service.ts +++ b/packages/ai-chat/src/common/chat-service.ts @@ -249,6 +249,7 @@ export class ChatServiceImpl implements ChatService { } const requestModel = session.model.addRequest(parsedRequest, agent?.id, resolvedContext); + requestModel.images = request.images; this.updateSessionMetadata(session, requestModel); resolutionContext.request = requestModel; diff --git a/packages/ai-core/src/common/language-model.ts b/packages/ai-core/src/common/language-model.ts index f9ebd4f9e8c9d..47a0e70eb0e6c 100644 --- a/packages/ai-core/src/common/language-model.ts +++ b/packages/ai-core/src/common/language-model.ts @@ -19,7 +19,7 @@ import { inject, injectable, named, postConstruct } from '@theia/core/shared/inv export type MessageActor = 'user' | 'ai' | 'system'; -export type LanguageModelMessage = TextMessage | ThinkingMessage | ToolUseMessage | ToolResultMessage; +export type LanguageModelMessage = TextMessage | ThinkingMessage | ToolUseMessage | ToolResultMessage | ImageMessage; export namespace LanguageModelMessage { export function isTextMessage(obj: LanguageModelMessage): obj is TextMessage { @@ -34,6 +34,9 @@ export namespace LanguageModelMessage { export function isToolResultMessage(obj: LanguageModelMessage): obj is ToolResultMessage { return obj.type === 'tool_result'; } + export function isImageMessage(obj: LanguageModelMessage): obj is ImageMessage { + return obj.type === 'image'; + } } export interface TextMessage { actor: MessageActor; @@ -63,6 +66,23 @@ export interface ToolUseMessage { input: unknown; name: string; } +export interface UrlImageData { url: string }; +export interface Base64ImageData { + // base64 encoded image data + imageData: string; + // the media type + mediaType: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; +}; +export type LLMImageData = UrlImageData | Base64ImageData; +export namespace LLMImageData { + export const isUrlImage = (obj: LLMImageData): obj is UrlImageData => 'url' in obj; + export const isBase64ImageData = (obj: LLMImageData): obj is Base64ImageData => 'imageData' in obj; +} +export interface ImageMessage { + actor: 'ai' | 'user'; + type: 'image'; + image: LLMImageData; +} export const isLanguageModelRequestMessage = (obj: unknown): obj is LanguageModelMessage => !!(obj && typeof obj === 'object' && diff --git a/packages/ai-google/src/node/google-language-model.ts b/packages/ai-google/src/node/google-language-model.ts index 5dae76f8d9041..d548b7b9d427d 100644 --- a/packages/ai-google/src/node/google-language-model.ts +++ b/packages/ai-google/src/node/google-language-model.ts @@ -23,7 +23,8 @@ import { LanguageModelStreamResponsePart, LanguageModelTextResponse, TokenUsageService, - UserRequest + UserRequest, + LLMImageData } from '@theia/ai-core'; import { CancellationToken } from '@theia/core'; import { GoogleGenAI, FunctionCallingConfigMode, FunctionDeclaration, Content, Schema, Part, Modality } from '@google/genai'; @@ -48,6 +49,8 @@ const convertMessageToPart = (message: LanguageModelMessage): Part[] | undefined } else if (LanguageModelMessage.isThinkingMessage(message)) { return [{ thought: true }, { text: message.thinking }]; + } else if (LanguageModelMessage.isImageMessage(message) && LLMImageData.isBase64ImageData(message.image)) { + return [{ inlineData: { data: message.image.imageData, mimeType: message.image.mediaType } }]; } }; /** diff --git a/packages/ai-openai/src/node/openai-language-model.ts b/packages/ai-openai/src/node/openai-language-model.ts index 84ae30d14dd56..c439d9f81be32 100644 --- a/packages/ai-openai/src/node/openai-language-model.ts +++ b/packages/ai-openai/src/node/openai-language-model.ts @@ -23,7 +23,8 @@ import { LanguageModelTextResponse, TextMessage, TokenUsageService, - UserRequest + UserRequest, + LLMImageData } from '@theia/ai-core'; import { CancellationToken } from '@theia/core'; import { injectable } from '@theia/core/shared/inversify'; @@ -284,6 +285,20 @@ export class OpenAiModelUtils { content: '' }; } + if (LanguageModelMessage.isImageMessage(message) && message.actor === 'user') { + return { + role: 'user', + content: [{ + type: 'image_url', + image_url: { + url: + LLMImageData.isBase64ImageData(message.image) ? + `data:${message.image.mediaType};base64,${message.image.imageData}` : + message.image.url + } + }] + }; + } throw new Error(`Unknown message type:'${JSON.stringify(message)}'`); } From 99665b80721289844d76726b604c47f61be0c48f Mon Sep 17 00:00:00 2001 From: Martin Fleck Date: Tue, 3 Jun 2025 17:45:39 +0200 Subject: [PATCH 2/5] Move images from request to context --- .../src/browser/chat-tree-view/chat-view-tree-widget.tsx | 2 +- packages/ai-chat/src/common/chat-agents.ts | 2 +- packages/ai-chat/src/common/chat-model.ts | 6 +++--- packages/ai-chat/src/common/chat-service.ts | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx b/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx index 4f03d639fd3d5..546638f1b90ce 100644 --- a/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx @@ -550,7 +550,7 @@ const ChatRequestRender = ( provideChatInputWidget: () => ReactWidget | undefined, }) => { const parts = node.request.message.parts; - const images = node.request.images || []; + const images = node.request.context.images || []; if (EditableChatRequestModel.isEditing(node.request)) { const widget = provideChatInputWidget(); diff --git a/packages/ai-chat/src/common/chat-agents.ts b/packages/ai-chat/src/common/chat-agents.ts index 7d13bd35de038..9afecda3a8f8f 100644 --- a/packages/ai-chat/src/common/chat-agents.ts +++ b/packages/ai-chat/src/common/chat-agents.ts @@ -259,7 +259,7 @@ export abstract class AbstractChatAgent implements ChatAgent { text: text, }); } - request.images?.forEach(image => { + request.context.images?.forEach(image => { messages.push({ actor: 'user', type: 'image', image }); }); if (request.response.isComplete || includeResponseInProgress) { diff --git a/packages/ai-chat/src/common/chat-model.ts b/packages/ai-chat/src/common/chat-model.ts index 26d297caccd13..70c7c4dd0f0bc 100644 --- a/packages/ai-chat/src/common/chat-model.ts +++ b/packages/ai-chat/src/common/chat-model.ts @@ -274,9 +274,8 @@ export interface ChangeSetDecoration { } export interface ChatRequest { - readonly text?: string; + readonly text: string; readonly displayText?: string; - readonly images?: LLMImageData[]; /** * If the request has been triggered in the context of * an existing request, this id will be set to the id of the @@ -284,10 +283,12 @@ export interface ChatRequest { */ readonly referencedRequestId?: string; readonly variables?: readonly AIVariableResolutionRequest[]; + readonly images?: LLMImageData[]; } export interface ChatContext { variables: ResolvedAIContextVariable[]; + images?: LLMImageData[]; } export interface ChatRequestModel { @@ -296,7 +297,6 @@ export interface ChatRequestModel { readonly request: ChatRequest; readonly response: ChatResponseModel; readonly message: ParsedChatRequest; - readonly images?: LLMImageData[]; readonly context: ChatContext; readonly agentId?: string; readonly data?: { [key: string]: unknown }; diff --git a/packages/ai-chat/src/common/chat-service.ts b/packages/ai-chat/src/common/chat-service.ts index 00ac86f89d2af..c10f9f93d6c5f 100644 --- a/packages/ai-chat/src/common/chat-service.ts +++ b/packages/ai-chat/src/common/chat-service.ts @@ -19,7 +19,7 @@ *--------------------------------------------------------------------------------------------*/ // Partially copied from https://github.com/microsoft/vscode/blob/a2cab7255c0df424027be05d58e1b7b941f4ea60/src/vs/workbench/contrib/chat/common/chatService.ts -import { AIVariableResolutionRequest, AIVariableService, ResolvedAIContextVariable } from '@theia/ai-core'; +import { AIVariableResolutionRequest, AIVariableService, LLMImageData, ResolvedAIContextVariable } from '@theia/ai-core'; import { Emitter, ILogger, generateUuid } from '@theia/core'; import { inject, injectable, optional } from '@theia/core/shared/inversify'; import { Event } from '@theia/core/shared/vscode-languageserver-protocol'; @@ -131,7 +131,7 @@ export interface ChatService { sendRequest( sessionId: string, - request: ChatRequest + request: ChatRequest, ): Promise; deleteChangeSet(sessionId: string): void; @@ -234,6 +234,7 @@ export class ChatServiceImpl implements ChatService { const resolutionContext: ChatSessionContext = { model: session.model }; const resolvedContext = await this.resolveChatContext(request.variables ?? session.model.context.getVariables(), resolutionContext); + resolvedContext.images = request.images; const parsedRequest = await this.chatRequestParser.parseChatRequest(request, session.model.location, resolvedContext); const agent = this.getAgent(parsedRequest, session); @@ -249,7 +250,6 @@ export class ChatServiceImpl implements ChatService { } const requestModel = session.model.addRequest(parsedRequest, agent?.id, resolvedContext); - requestModel.images = request.images; this.updateSessionMetadata(session, requestModel); resolutionContext.request = requestModel; From 7d34bd4272d45b0eefc5736a281365dd8f3500b6 Mon Sep 17 00:00:00 2001 From: Martin Fleck Date: Wed, 4 Jun 2025 12:04:02 +0200 Subject: [PATCH 3/5] Cleanup --- .../ai-chat-ui/src/browser/chat-input-widget.tsx | 13 ------------- packages/ai-chat/src/common/chat-model.ts | 1 - packages/ai-chat/src/common/chat-service.ts | 2 +- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/packages/ai-chat-ui/src/browser/chat-input-widget.tsx b/packages/ai-chat-ui/src/browser/chat-input-widget.tsx index fc590c2dd285d..9e0676ffe41fb 100644 --- a/packages/ai-chat-ui/src/browser/chat-input-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-input-widget.tsx @@ -38,12 +38,6 @@ type DeleteChangeSet = (requestModel: ChatRequestModel) => void; type DeleteChangeSetElement = (requestModel: ChatRequestModel, index: number) => void; type OpenContextElement = (request: AIVariableResolutionRequest) => unknown; -// Interface for the payload submitted to the AI -// interface ChatPayload { -// text: string; -// images?: PastedImage[]; -// } - export const AIChatInputConfiguration = Symbol('AIChatInputConfiguration'); export interface AIChatInputConfiguration { showContext?: boolean; @@ -539,13 +533,6 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu return () => disposable.dispose(); }, [props.actionService, props.chatModel.changeSet]); - // // Extract image references from text - // const extractImageReferences = (text: string): string[] => { - // const regex = /!\[.*?\]\((img-\d+)\)/g; - // const matches = [...text.matchAll(regex)]; - // return matches.map(match => match[1]); - // }; - React.useEffect(() => { const disposable = props.decoratorService.onDidChangeDecorations(() => { setChangeSetUI(buildChangeSetUI( diff --git a/packages/ai-chat/src/common/chat-model.ts b/packages/ai-chat/src/common/chat-model.ts index 169c7746b056b..7eb00765ca5b5 100644 --- a/packages/ai-chat/src/common/chat-model.ts +++ b/packages/ai-chat/src/common/chat-model.ts @@ -1153,7 +1153,6 @@ export class MutableChatRequestModel implements ChatRequestModel, EditableChatRe protected _context: ChatContext; protected _agentId?: string; protected _data: { [key: string]: unknown }; - public images?: LLMImageData[]; protected _isEditing = false; protected readonly toDispose = new DisposableCollection(); diff --git a/packages/ai-chat/src/common/chat-service.ts b/packages/ai-chat/src/common/chat-service.ts index 2fdb80ec60c35..2c47eb7f78d83 100644 --- a/packages/ai-chat/src/common/chat-service.ts +++ b/packages/ai-chat/src/common/chat-service.ts @@ -131,7 +131,7 @@ export interface ChatService { sendRequest( sessionId: string, - request: ChatRequest, + request: ChatRequest ): Promise; deleteChangeSet(sessionId: string): void; From 13b4250b78865aec89ebf9cfdd01654c89d1f87d Mon Sep 17 00:00:00 2001 From: Martin Fleck Date: Wed, 4 Jun 2025 11:58:54 +0200 Subject: [PATCH 4/5] Properly use image context variables --- .../src/node/anthropic-language-model.ts | 26 ++- .../ai-chat-ui/src/browser/ImagePreview.tsx | 53 ------ .../src/browser/chat-input-widget.tsx | 171 ++++++++---------- .../chat-tree-view/chat-view-tree-widget.tsx | 24 +-- .../src/browser/chat-view-widget.tsx | 19 +- .../ai-chat-ui/src/browser/style/index.css | 91 +++------- .../src/browser/ai-chat-frontend-module.ts | 5 + .../file-chat-variable-contribution.ts | 122 ++++++++++++- .../image-context-variable-contribution.ts | 153 ++++++++++++++++ packages/ai-chat/src/common/chat-agents.ts | 14 +- packages/ai-chat/src/common/chat-model.ts | 3 - .../ai-chat/src/common/chat-request-parser.ts | 119 ++++++------ packages/ai-chat/src/common/chat-service.ts | 1 - .../src/common/image-context-variable.ts | 116 ++++++++++++ .../src/browser/frontend-variable-service.ts | 36 ++++ packages/ai-core/src/common/language-model.ts | 21 +-- .../ai-core/src/common/variable-service.ts | 12 +- .../src/node/google-language-model.ts | 6 +- .../src/node/openai-language-model.ts | 6 +- 19 files changed, 660 insertions(+), 338 deletions(-) delete mode 100644 packages/ai-chat-ui/src/browser/ImagePreview.tsx create mode 100644 packages/ai-chat/src/browser/image-context-variable-contribution.ts create mode 100644 packages/ai-chat/src/common/image-context-variable.ts diff --git a/packages/ai-anthropic/src/node/anthropic-language-model.ts b/packages/ai-anthropic/src/node/anthropic-language-model.ts index 017d02aac2f62..b7dc1ce62b993 100644 --- a/packages/ai-anthropic/src/node/anthropic-language-model.ts +++ b/packages/ai-anthropic/src/node/anthropic-language-model.ts @@ -25,11 +25,12 @@ import { TokenUsageService, TokenUsageParams, UserRequest, - LLMImageData + ImageContent, + ImageMimeType } from '@theia/ai-core'; import { CancellationToken, isArray } from '@theia/core'; import { Anthropic } from '@anthropic-ai/sdk'; -import { Message, MessageParam } from '@anthropic-ai/sdk/resources'; +import { Message, MessageParam, Base64ImageSource } from '@anthropic-ai/sdk/resources'; export const DEFAULT_MAX_TOKENS = 4096; @@ -50,13 +51,30 @@ const createMessageContent = (message: LanguageModelMessage): MessageParam['cont } else if (LanguageModelMessage.isToolResultMessage(message)) { return [{ type: 'tool_result', tool_use_id: message.tool_use_id }]; } else if (LanguageModelMessage.isImageMessage(message)) { - if (LLMImageData.isBase64ImageData(message.image)) { - return [{ type: 'image', source: { type: 'base64', media_type: message.image.mediaType, data: message.image.imageData } }]; + if (ImageContent.isBase64(message.image)) { + return [{ type: 'image', source: { type: 'base64', media_type: mimeTypeToMediaType(message.image.mimeType), data: message.image.base64data } }]; + } else { + return [{ type: 'image', source: { type: 'url', url: message.image.url } }]; } } throw new Error(`Unknown message type:'${JSON.stringify(message)}'`); }; +function mimeTypeToMediaType(mimeType: ImageMimeType): Base64ImageSource['media_type'] { + switch (mimeType) { + case 'image/gif': + return 'image/gif'; + case 'image/jpeg': + return 'image/jpeg'; + case 'image/png': + return 'image/png'; + case 'image/webp': + return 'image/webp'; + default: + return 'image/jpeg'; + } +} + /** * Transforms Theia language model messages to Anthropic API format * @param messages Array of LanguageModelRequestMessage to transform diff --git a/packages/ai-chat-ui/src/browser/ImagePreview.tsx b/packages/ai-chat-ui/src/browser/ImagePreview.tsx deleted file mode 100644 index 53814e84087e5..0000000000000 --- a/packages/ai-chat-ui/src/browser/ImagePreview.tsx +++ /dev/null @@ -1,53 +0,0 @@ -// ***************************************************************************** -// Copyright (C) 2025 EclipseSource GmbH. -// -// This program and the accompanying materials are made available under the -// terms of the Eclipse Public License v. 2.0 which is available at -// http://www.eclipse.org/legal/epl-2.0. -// -// This Source Code may also be made available under the following Secondary -// Licenses when the conditions for such availability set forth in the Eclipse -// Public License v. 2.0 are satisfied: GNU General Public License, version 2 -// with the GNU Classpath Exception which is available at -// https://www.gnu.org/software/classpath/license.html. -// -// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 -// ***************************************************************************** -import { nls } from '@theia/core'; -import * as React from '@theia/core/shared/react'; - -// Interface for pasted image data -export interface PastedImage { - id: string; - data: string; - name: string; - type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; -} - -// Image Preview Component -interface ImagePreviewProps { - images: PastedImage[]; - onRemove: (id: string) => void; -} -export const ImagePreview: React.FC = ({ images, onRemove }) => { - if (images.length === 0) { return undefined; } - - return ( -
- {images.map(img => ( -
- {img.name} -
- { - e.stopPropagation(); - onRemove(img.id); - }} /> -
-
- ))} -
- ); -}; diff --git a/packages/ai-chat-ui/src/browser/chat-input-widget.tsx b/packages/ai-chat-ui/src/browser/chat-input-widget.tsx index 9e0676ffe41fb..787d93f5d413c 100644 --- a/packages/ai-chat-ui/src/browser/chat-input-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-input-widget.tsx @@ -15,7 +15,7 @@ // ***************************************************************************** import { ChangeSet, ChangeSetElement, ChatAgent, ChatChangeEvent, ChatModel, ChatRequestModel, ChatService, ChatSuggestion } from '@theia/ai-chat'; import { ChangeSetDecoratorService } from '@theia/ai-chat/lib/browser/change-set-decorator-service'; -import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core'; +import { AIVariableResolutionRequest } from '@theia/ai-core'; import { FrontendVariableService } from '@theia/ai-core/lib/browser'; import { Disposable, DisposableCollection, InMemoryResources, URI, nls } from '@theia/core'; import { ContextMenuRenderer, LabelProvider, Message, OpenerService, ReactWidget } from '@theia/core/lib/browser'; @@ -25,11 +25,11 @@ import * as React from '@theia/core/shared/react'; import { IMouseEvent } from '@theia/monaco-editor-core'; import { MonacoEditorProvider } from '@theia/monaco/lib/browser/monaco-editor-provider'; import { SimpleMonacoEditor } from '@theia/monaco/lib/browser/simple-monaco-editor'; -import { ImagePreview, PastedImage } from './ImagePreview'; import { ChangeSetActionRenderer, ChangeSetActionService } from './change-set-actions/change-set-action-service'; import { ChatInputAgentSuggestions } from './chat-input-agent-suggestions'; import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution'; import { ContextVariablePicker } from './context-variable-picker'; +import { ImageContextVariable } from '@theia/ai-chat/lib/common/image-context-variable'; type Query = (query: string) => Promise; type Unpin = () => void; @@ -134,12 +134,6 @@ export class AIChatInputWidget extends ReactWidget { this.update(); } - // State for pasted images - private _pastedImages: PastedImage[] = []; - public get pastedImages(): PastedImage[] { - return this._pastedImages; - } - @postConstruct() protected init(): void { this.id = AIChatInputWidget.ID; @@ -148,41 +142,6 @@ export class AIChatInputWidget extends ReactWidget { this.update(); } - // Process a file blob into an image - private processImageFromClipboard(blob: File): void { - const reader = new FileReader(); - reader.onload = e => { - if (!e.target?.result) { return; } - - const imageId = `img-${Date.now()}`; - const dataUrl = e.target.result as string; - - // Extract the base64 data by removing the data URL prefix - // Format is like:  - const imageData = dataUrl.substring(dataUrl.indexOf(',') + 1); - - // Add image to state - const newImage: PastedImage = { - id: imageId, - data: imageData, // Store just the base64 data without the prefix - name: blob.name || `pasted-image-${Date.now()}.png`, - type: blob.type as PastedImage['type'] - }; - - this._pastedImages = [...this._pastedImages, newImage]; - - this.update(); - }; - - reader.readAsDataURL(blob); - } - - // Remove an image by id - public removeImage(id: string): void { - this._pastedImages = this._pastedImages.filter(img => img.id !== id); - this.update(); - } - protected override onActivateRequest(msg: Message): void { super.onActivateRequest(msg); this.editorReady.promise.then(() => { @@ -204,6 +163,7 @@ export class AIChatInputWidget extends ReactWidget { onCancel={this._onCancel.bind(this)} onDragOver={this.onDragOver.bind(this)} onDrop={this.onDrop.bind(this)} + onPaste={this.onPaste.bind(this)} onDeleteChangeSet={this._onDeleteChangeSet.bind(this)} onDeleteChangeSetElement={this._onDeleteChangeSetElement.bind(this)} onAddContextElement={this.addContextElement.bind(this)} @@ -229,9 +189,6 @@ export class AIChatInputWidget extends ReactWidget { decoratorService={this.changeSetDecoratorService} initialValue={this._initialValue} openerService={this.openerService} - pastedImages={this._pastedImages} - onRemoveImage={this.removeImage.bind(this)} - onImagePasted={this.processImageFromClipboard.bind(this)} suggestions={this._chatModel.suggestions} /> ); @@ -271,6 +228,30 @@ export class AIChatInputWidget extends ReactWidget { }); } + protected onPaste(event: ClipboardEvent): void { + event.preventDefault(); + event.stopPropagation(); + + this.variableService.getPasteResult(event, { type: 'ai-chat-input-widget' }).then(result => { + result.variables.forEach(variable => this.addContext(variable)); + + if (result.text) { + const position = this.editorRef?.getControl().getPosition(); + if (position && result.text) { + this.editorRef?.getControl().executeEdits('paste', [{ + range: { + startLineNumber: position.lineNumber, + startColumn: position.column, + endLineNumber: position.lineNumber, + endColumn: position.column + }, + text: result.text + }]); + } + } + }); + } + protected async openContextElement(request: AIVariableResolutionRequest): Promise { const session = this.chatService.getSessions().find(candidate => candidate.model.id === this._chatModel.id); const context = { session }; @@ -315,10 +296,11 @@ export class AIChatInputWidget extends ReactWidget { interface ChatInputProperties { onCancel: (requestModel: ChatRequestModel) => void; - onQuery: (query?: string, images?: LLMImageData[]) => void; + onQuery: (query: string) => void; onUnpin: () => void; onDragOver: (event: React.DragEvent) => void; onDrop: (event: React.DragEvent) => void; + onPaste: (event: ClipboardEvent) => void; onDeleteChangeSet: (sessionId: string) => void; onDeleteChangeSetElement: (sessionId: string, uri: URI) => void; onAddContextElement: () => void; @@ -341,9 +323,6 @@ interface ChatInputProperties { decoratorService: ChangeSetDecoratorService; initialValue?: string; openerService: OpenerService; - pastedImages: PastedImage[]; - onRemoveImage: (id: string) => void; - onImagePasted: (blob: File) => void; suggestions: readonly ChatSuggestion[] } @@ -372,21 +351,9 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu const containerRef = React.useRef(null); // Handle paste events on the container - const handlePaste = React.useCallback((e: ClipboardEvent) => { - if (!e.clipboardData?.items) { return; } - - for (const item of e.clipboardData.items) { - if (item.type.startsWith('image/')) { - const blob = item.getAsFile(); - if (blob) { - e.preventDefault(); - e.stopPropagation(); - props.onImagePasted(blob); - break; - } - } - } - }, [props.onImagePasted]); + const handlePaste = React.useCallback((event: ClipboardEvent) => { + props.onPaste(event); + }, [props.onPaste]); // Set up paste handler on the container div React.useEffect(() => { @@ -554,19 +521,18 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu }, [editorRef]); const submit = React.useCallback(function submit(value: string): void { - if ((!value || value.trim().length === 0) && props.pastedImages.length === 0) { + if (!value || value.trim().length === 0) { return; } setInProgress(true); - props.onQuery(value, props.pastedImages.map(p => ({ imageData: p.data, mediaType: p.type }))); + props.onQuery(value); setValue(''); if (editorRef.current) { editorRef.current.document.textEditorModel.setValue(''); - }// Clear pasted images after submission - props.pastedImages.forEach(image => props.onRemoveImage(image.id)); - }, [props.context, props.onQuery, setValue, props.pastedImages]); + } + }, [props.context, props.onQuery, setValue]); const onKeyDown = React.useCallback((event: React.KeyboardEvent) => { if (!props.isEnabled) { @@ -667,13 +633,8 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu const contextUI = buildContextUI(props.context, props.labelProvider, props.onDeleteContextElement, props.onOpenContextElement); return ( -
- {} +
+ {props.showSuggestions !== false && } {props.showChangeSet && changeSetUI?.elements && } @@ -681,9 +642,6 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu
{nls.localizeByDefault('Ask a question')}
- {props.pastedImages.length > 0 && - - } {props.context && props.context.length > 0 && } @@ -877,6 +835,7 @@ function buildContextUI( } return { context: context.map((element, index) => ({ + variable: element, name: labelProvider.getName(element), iconClass: labelProvider.getIcon(element), nameClass: element.variable.name, @@ -890,6 +849,7 @@ function buildContextUI( interface ChatContextUI { context: { + variable: AIVariableResolutionRequest, name: string; iconClass: string; nameClass: string; @@ -903,20 +863,45 @@ interface ChatContextUI { const ChatContext: React.FunctionComponent = ({ context }) => (
    - {context.map((element, index) => ( -
  • element.open?.()}> -
    -
    - - {element.name} - - - {element.additionalInfo} - + {context.map((element, index) => { + if (ImageContextVariable.isImageContextRequest(element.variable)) { + const variable = ImageContextVariable.parseRequest(element.variable)!; + return
  • element.open?.()}> +
    +
    +
    + + {variable.name ?? variable.wsRelativePath?.split('/').pop()} + + + {element.additionalInfo} + +
    + { e.stopPropagation(); element.delete(); }} /> +
    +
    +
    + {variable.name} +
    +
    +
  • ; + } + return
  • element.open?.()}> +
    +
    +
    + + {element.name} + + + {element.additionalInfo} + +
    + { e.stopPropagation(); element.delete(); }} />
    - { e.stopPropagation(); element.delete(); }} /> -
  • - ))} + ; + })}
); diff --git a/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx b/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx index 546638f1b90ce..93e6c03eda83d 100644 --- a/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx @@ -24,13 +24,14 @@ import { EditableChatRequestModel, ParsedChatRequestAgentPart, ParsedChatRequestVariablePart, - type ChatHierarchyBranch, type ChatRequest, + type ChatHierarchyBranch, } from '@theia/ai-chat'; -import { AIVariableService, LLMImageData } from '@theia/ai-core'; +import { AIVariableService } from '@theia/ai-core'; import { AIActivationService } from '@theia/ai-core/lib/browser'; import { CommandRegistry, ContributionProvider, Disposable, DisposableCollection, Emitter } from '@theia/core'; import { + codicon, CompositeTreeNode, ContextMenuRenderer, HoverService, @@ -43,7 +44,6 @@ import { TreeProps, TreeWidget, Widget, - codicon, type ReactWidget } from '@theia/core/lib/browser'; import { nls } from '@theia/core/lib/common/nls'; @@ -56,9 +56,9 @@ import { } from '@theia/core/shared/inversify'; import * as React from '@theia/core/shared/react'; import { ChatNodeToolbarActionContribution } from '../chat-node-toolbar-action-contribution'; -import { ProgressMessage } from '../chat-progress-message'; import { ChatResponsePartRenderer } from '../chat-response-part-renderer'; import { useMarkdownRendering } from '../chat-response-renderer/markdown-part-renderer'; +import { ProgressMessage } from '../chat-progress-message'; import { AIChatTreeInputFactory, type AIChatTreeInputWidget } from './chat-view-tree-input-widget'; // TODO Instead of directly operating on the ChatRequestModel we could use an intermediate view model @@ -550,8 +550,6 @@ const ChatRequestRender = ( provideChatInputWidget: () => ReactWidget | undefined, }) => { const parts = node.request.message.parts; - const images = node.request.context.images || []; - if (EditableChatRequestModel.isEditing(node.request)) { const widget = provideChatInputWidget(); if (widget) { @@ -626,20 +624,6 @@ const ChatRequestRender = ( } })}

- {images.length > 0 && ( -
- {images.map((img, index) => ( -
- {LLMImageData.isBase64ImageData(img) ? - {`Image : undefined} -
- ))} -
- )} {renderFooter()}
); diff --git a/packages/ai-chat-ui/src/browser/chat-view-widget.tsx b/packages/ai-chat-ui/src/browser/chat-view-widget.tsx index e3dd749fffb04..db6577fbcb1d4 100644 --- a/packages/ai-chat-ui/src/browser/chat-view-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-view-widget.tsx @@ -21,7 +21,7 @@ import { inject, injectable, postConstruct } from '@theia/core/shared/inversify' import { AIChatInputWidget } from './chat-input-widget'; import { ChatViewTreeWidget } from './chat-tree-view/chat-view-tree-widget'; import { AIActivationService } from '@theia/ai-core/lib/browser/ai-activation-service'; -import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core'; +import { AIVariableResolutionRequest } from '@theia/ai-core'; import { ProgressBarFactory } from '@theia/core/lib/browser/progress-bar-factory'; import { FrontendVariableService } from '@theia/ai-core/lib/browser'; @@ -177,21 +177,10 @@ export class ChatViewWidget extends BaseWidget implements ExtractableWidget, Sta return this.onStateChangedEmitter.event; } - private isEmptyQuery(query?: string | ChatRequest): boolean { - if (query === undefined) { - return true; - } - if (typeof query === 'string') { - return query.length === 0; - } - return (query.text === undefined || query.text?.length === 0) && - (query.images === undefined || query.images?.length === 0); - } - - protected async onQuery(query?: string | ChatRequest, imageData?: LLMImageData[]): Promise { - if (!query || this.isEmptyQuery(query) && (!imageData || imageData.length === 0)) { return; } + protected async onQuery(query?: string | ChatRequest): Promise { + const chatRequest: ChatRequest = !query ? { text: '' } : typeof query === 'string' ? { text: query } : { ...query }; + if (chatRequest.text.length === 0) { return; } - const chatRequest: ChatRequest = typeof query === 'string' ? { text: query, images: imageData } : { ...query }; const requestProgress = await this.chatService.sendRequest(this.chatSession.id, chatRequest); requestProgress?.responseCompleted.then(responseModel => { if (responseModel.isError) { diff --git a/packages/ai-chat-ui/src/browser/style/index.css b/packages/ai-chat-ui/src/browser/style/index.css index 35ead12dcd4d9..e3f7c645f5ef3 100644 --- a/packages/ai-chat-ui/src/browser/style/index.css +++ b/packages/ai-chat-ui/src/browser/style/index.css @@ -7,8 +7,8 @@ flex: 1; } -.chat-input-widget > .ps__rail-x, -.chat-input-widget > .ps__rail-y { +.chat-input-widget>.ps__rail-x, +.chat-input-widget>.ps__rail-y { display: none !important; } @@ -23,7 +23,7 @@ overflow-wrap: break-word; } -div:last-child > .theia-ChatNode { +div:last-child>.theia-ChatNode { border: none; } @@ -59,6 +59,7 @@ div:last-child > .theia-ChatNode { } @keyframes dots { + 0%, 20% { content: ""; @@ -121,7 +122,7 @@ div:last-child > .theia-ChatNode { padding-inline-start: 1rem; } -.theia-ChatNode li > p { +.theia-ChatNode li>p { margin-top: 0; margin-bottom: 0; } @@ -135,7 +136,7 @@ div:last-child > .theia-ChatNode { font-size: var(--theia-code-font-size); } -.theia-RequestNode > p div { +.theia-RequestNode>p div { display: inline; } @@ -155,12 +156,6 @@ div:last-child > .theia-ChatNode { justify-content: flex-end; gap: 2px; } -.theia-RequestNode-Images { - display: flex; - flex-wrap: wrap; - gap: 8px; - margin-top: 8px; -} .theia-RequestNode-Footer .item { opacity: var(--theia-mod-disabled-opacity); @@ -175,19 +170,6 @@ div:last-child > .theia-ChatNode { cursor: pointer; opacity: 1; } -.theia-RequestNode-ImageContainer { - border: var(--theia-border-width) solid var(--theia-dropdown-border); - border-radius: 4px; - overflow: hidden; - height: 120px; - width: 140px; -} - -.theia-RequestNode-Image { - width: 100%; - height: 100%; - object-fit: contain; -} .theia-RequestNode-Footer :not(.item.enabled) .action-label { background: transparent; @@ -219,22 +201,28 @@ div:last-child > .theia-ChatNode { margin: 0; display: flex; flex-wrap: wrap; + align-items: center; gap: 6px; } .theia-ChatInput-ChatContext-Element { display: flex; - align-items: center; + flex-direction: column; border-radius: calc(var(--theia-ui-padding) * 2 / 3); border: var(--theia-border-width) solid var(--theia-dropdown-border); padding: 2px 4px 2px 6px; - height: 18px; line-height: 16px; min-width: 0; user-select: none; cursor: pointer; } +.theia-ChatInput-ChatContext-Row { + display: flex; + align-items: center; + width: 100%; +} + .theia-ChatInput-ChatContext-labelParts { flex: 1; min-width: 0; @@ -370,8 +358,7 @@ div:last-child > .theia-ChatNode { text-align: center; } -.theia-ChatInput-ChangeSet-List - .theia-ChatInput-ChangeSet-Icon.codicon::before { +.theia-ChatInput-ChangeSet-List .theia-ChatInput-ChangeSet-Icon.codicon::before { font-size: var(--theia-ui-font-size1); } @@ -388,8 +375,7 @@ div:last-child > .theia-ChatNode { color: var(--theia-disabledForeground); } -.theia-ChatInput-ChangeSet-List - .theia-ChatInput-ChangeSet-AdditionalInfo-SuffixIcon { +.theia-ChatInput-ChangeSet-List .theia-ChatInput-ChangeSet-AdditionalInfo-SuffixIcon { font-size: var(--theia-ui-font-size0) px; margin-left: 4px; } @@ -516,14 +502,6 @@ div:last-child > .theia-ChatNode { padding-left: 8px !important; } -/* Image Preview Styles */ -.theia-ChatInput-ImagePreview { - display: flex; - flex-wrap: wrap; - gap: 8px; - padding: 8px; - border-bottom: var(--theia-border-width) solid var(--theia-dropdown-border); -} .theia-ChatInput-ImagePreview-Item { position: relative; @@ -534,37 +512,21 @@ div:last-child > .theia-ChatNode { width: 120px; } -.theia-ChatInput-ImagePreview-Item img { +.theia-ChatInput-ChatContext-ImageRow { + margin-top: 4px; width: 100%; - height: 100%; - object-fit: contain; -} - -.theia-ChatInput-ImagePreview-Actions { - position: absolute; - top: 2px; - right: 2px; - background-color: rgba(0, 0, 0, 0.5); - border-radius: 4px; display: flex; - align-items: center; justify-content: center; - z-index: 10; } -.theia-ChatInput-ImagePreview-Actions .action { - width: 20px; - height: 20px; - display: flex; - align-items: center; - justify-content: center; - cursor: pointer; - color: white; +.theia-ChatInput-ImageContext-Element { + min-width: 150px; } -.theia-ChatInput-ImagePreview-Actions .action:hover { - background-color: rgba(255, 255, 255, 0.2); - border-radius: 4px; +.theia-ChatInput-ImagePreview-Item img { + width: 100%; + height: 100%; + object-fit: contain; } .theia-ChatInputOptions { @@ -657,8 +619,7 @@ div:last-child > .theia-ChatNode { display: flex; flex-direction: column; gap: 8px; - border: var(--theia-border-width) solid - var(--theia-sideBarSectionHeader-border); + border: var(--theia-border-width) solid var(--theia-sideBarSectionHeader-border); padding: 8px 12px 12px; border-radius: 5px; margin: 0 0 8px 0; @@ -797,6 +758,7 @@ details[open].collapsible-arguments .collapsible-arguments-summary { margin-bottom: 10px; height: calc(100% - 50px); } + .monaco-session-settings-dialog { flex: 1; min-height: 350px; @@ -806,6 +768,7 @@ details[open].collapsible-arguments .collapsible-arguments-summary { border: 1px solid var(--theia-editorWidget-border); margin-bottom: 10px; } + .session-settings-error { color: var(--theia-errorForeground); min-height: 1em; diff --git a/packages/ai-chat/src/browser/ai-chat-frontend-module.ts b/packages/ai-chat/src/browser/ai-chat-frontend-module.ts index a614eafcf98ad..ea96e830fbadb 100644 --- a/packages/ai-chat/src/browser/ai-chat-frontend-module.ts +++ b/packages/ai-chat/src/browser/ai-chat-frontend-module.ts @@ -52,6 +52,7 @@ import { TaskContextVariableLabelProvider } from './task-context-variable-label- import { TaskContextService, TaskContextStorageService } from './task-context-service'; import { InMemoryTaskContextStorage } from './task-context-storage-service'; import { AIChatFrontendContribution } from './ai-chat-frontend-contribution'; +import { ImageContextVariableContribution } from './image-context-variable-contribution'; export default new ContainerModule(bind => { bindContributionProvider(bind, Agent); @@ -129,6 +130,10 @@ export default new ContainerModule(bind => { bind(TaskContextVariableLabelProvider).toSelf().inSingletonScope(); bind(LabelProviderContribution).toService(TaskContextVariableLabelProvider); + bind(ImageContextVariableContribution).toSelf().inSingletonScope(); + bind(AIVariableContribution).toService(ImageContextVariableContribution); + bind(LabelProviderContribution).toService(ImageContextVariableContribution); + bind(TaskContextService).toSelf().inSingletonScope(); bind(InMemoryTaskContextStorage).toSelf().inSingletonScope(); bind(TaskContextStorageService).toService(InMemoryTaskContextStorage); diff --git a/packages/ai-chat/src/browser/file-chat-variable-contribution.ts b/packages/ai-chat/src/browser/file-chat-variable-contribution.ts index 6bbcf1a6f2a02..4e45d9a03a406 100644 --- a/packages/ai-chat/src/browser/file-chat-variable-contribution.ts +++ b/packages/ai-chat/src/browser/file-chat-variable-contribution.ts @@ -24,6 +24,7 @@ import { FileQuickPickItem, QuickFileSelectService } from '@theia/file-search/li import { WorkspaceService } from '@theia/workspace/lib/browser'; import { FileService } from '@theia/filesystem/lib/browser/file-service'; import { VARIABLE_ADD_CONTEXT_COMMAND } from './ai-chat-frontend-contribution'; +import { IMAGE_CONTEXT_VARIABLE, ImageContextVariable } from '../common/image-context-variable'; @injectable() export class FileChatVariableContribution implements FrontendVariableContribution { @@ -41,6 +42,7 @@ export class FileChatVariableContribution implements FrontendVariableContributio registerVariables(service: FrontendVariableService): void { service.registerArgumentPicker(FILE_VARIABLE, this.triggerArgumentPicker.bind(this)); + service.registerArgumentPicker(IMAGE_CONTEXT_VARIABLE, this.imageArgumentPicker.bind(this)); service.registerArgumentCompletionProvider(FILE_VARIABLE, this.provideArgumentCompletionItems.bind(this)); service.registerDropHandler(this.handleDrop.bind(this)); } @@ -68,6 +70,57 @@ export class FileChatVariableContribution implements FrontendVariableContributio }); } + protected async imageArgumentPicker(): Promise { + const quickPick = this.quickInputService.createQuickPick(); + quickPick.title = 'Select an image file'; + + // Get all files and filter only image files + const allPicks = await this.quickFileSelectService.getPicks(); + quickPick.items = allPicks.filter(item => { + if (FileQuickPickItem.is(item)) { + return this.isImageFile(item.uri.path.toString()); + } + return false; + }); + + const updateItems = async (value: string) => { + const filteredPicks = await this.quickFileSelectService.getPicks(value, CancellationToken.None); + quickPick.items = filteredPicks.filter(item => { + if (FileQuickPickItem.is(item)) { + return this.isImageFile(item.uri.path.toString()); + } + return false; + }); + }; + + const onChangeListener = quickPick.onDidChangeValue(updateItems); + quickPick.show(); + + return new Promise(resolve => { + quickPick.onDispose(onChangeListener.dispose); + quickPick.onDidAccept(async () => { + const selectedItem = quickPick.selectedItems[0]; + if (selectedItem && FileQuickPickItem.is(selectedItem)) { + quickPick.dispose(); + const filePath = await this.wsService.getWorkspaceRelativePath(selectedItem.uri); + const fileName = selectedItem.uri.displayName; + const base64Data = await this.fileToBase64(selectedItem.uri); + const mimeType = this.getMimeTypeFromExtension(selectedItem.uri.path.toString()); + + // Create the argument string in the required format + const imageVarArgs: ImageContextVariable = { + name: fileName, + wsRelativePath: filePath, + data: base64Data, + mimeType: mimeType + }; + + resolve(ImageContextVariable.createArgString(imageVarArgs)); + } + }); + }); + } + protected async provideArgumentCompletionItems( model: monaco.editor.ITextModel, position: monaco.Position, @@ -106,6 +159,51 @@ export class FileChatVariableContribution implements FrontendVariableContributio ); } + /** + * Checks if a file is an image based on its extension. + */ + protected isImageFile(filePath: string): boolean { + const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp']; + const extension = filePath.toLowerCase().substring(filePath.lastIndexOf('.')); + return imageExtensions.includes(extension); + } + + /** + * Determines the MIME type based on file extension. + */ + protected getMimeTypeFromExtension(filePath: string): string { + const extension = filePath.toLowerCase().substring(filePath.lastIndexOf('.')); + const mimeTypes: { [key: string]: string } = { + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.gif': 'image/gif', + '.bmp': 'image/bmp', + '.svg': 'image/svg+xml', + '.webp': 'image/webp' + }; + return mimeTypes[extension] || 'application/octet-stream'; + } + + /** + * Converts a file to base64 data URL. + */ + protected async fileToBase64(uri: URI): Promise { + try { + const fileContent = await this.fileService.readFile(uri); + // Convert the array buffer to base64 + const uint8Array = new Uint8Array(fileContent.value.buffer); + let binary = ''; + for (let i = 0; i < uint8Array.length; i++) { + binary += String.fromCharCode(uint8Array[i]); + } + return btoa(binary); + } catch (error) { + console.error('Error reading file content:', error); + return ''; + } + } + protected async handleDrop(event: DragEvent, _: AIVariableContext): Promise { const data = event.dataTransfer?.getData('selected-tree-nodes'); if (!data) { @@ -126,11 +224,25 @@ export class FileChatVariableContribution implements FrontendVariableContributio const uri = URI.fromFilePath(filePath); if (await this.fileService.exists(uri)) { const wsRelativePath = await this.wsService.getWorkspaceRelativePath(uri); - variables.push({ - variable: FILE_VARIABLE, - arg: wsRelativePath - }); - texts.push(`${PromptText.VARIABLE_CHAR}${FILE_VARIABLE.name}${PromptText.VARIABLE_SEPARATOR_CHAR}${wsRelativePath}`); + const fileName = uri.displayName; + + if (this.isImageFile(filePath)) { + const base64Data = await this.fileToBase64(uri); + const mimeType = this.getMimeTypeFromExtension(filePath); + variables.push(ImageContextVariable.createRequest({ + [ImageContextVariable.name]: fileName, + [ImageContextVariable.wsRelativePath]: wsRelativePath, + [ImageContextVariable.data]: base64Data, + [ImageContextVariable.mimeType]: mimeType + })); + // we do not want to push a text for image variables + } else { + variables.push({ + variable: FILE_VARIABLE, + arg: wsRelativePath + }); + texts.push(`${PromptText.VARIABLE_CHAR}${FILE_VARIABLE.name}${PromptText.VARIABLE_SEPARATOR_CHAR}${wsRelativePath}`); + } } } diff --git a/packages/ai-chat/src/browser/image-context-variable-contribution.ts b/packages/ai-chat/src/browser/image-context-variable-contribution.ts new file mode 100644 index 0000000000000..cb0587ed73655 --- /dev/null +++ b/packages/ai-chat/src/browser/image-context-variable-contribution.ts @@ -0,0 +1,153 @@ +// ***************************************************************************** +// Copyright (C) 2025 EclipseSource GmbH and others. +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License v. 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0. +// +// This Source Code may also be made available under the following Secondary +// Licenses when the conditions for such availability set forth in the Eclipse +// Public License v. 2.0 are satisfied: GNU General Public License, version 2 +// with the GNU Classpath Exception which is available at +// https://www.gnu.org/software/classpath/license.html. +// +// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 +// ***************************************************************************** + +import { + AIVariableContext, AIVariableContribution, + AIVariableOpener, AIVariableResolutionRequest, AIVariableResolver, ResolvedAIContextVariable +} from '@theia/ai-core'; +import { FrontendVariableService, AIVariablePasteResult } from '@theia/ai-core/lib/browser'; +import { Path, URI } from '@theia/core'; +import { LabelProvider, LabelProviderContribution, open, OpenerService } from '@theia/core/lib/browser'; +import { inject, injectable } from '@theia/core/shared/inversify'; +import { FileService } from '@theia/filesystem/lib/browser/file-service'; +import { WorkspaceService } from '@theia/workspace/lib/browser'; +import { IMAGE_CONTEXT_VARIABLE, ImageContextVariable, ImageContextVariableRequest } from '../common/image-context-variable'; + +@injectable() +export class ImageContextVariableContribution implements AIVariableContribution, AIVariableResolver, AIVariableOpener, LabelProviderContribution { + @inject(FileService) + protected readonly fileService: FileService; + + @inject(WorkspaceService) + protected readonly wsService: WorkspaceService; + + @inject(OpenerService) + protected readonly openerService: OpenerService; + + @inject(LabelProvider) + protected readonly labelProvider: LabelProvider; + + registerVariables(service: FrontendVariableService): void { + service.registerResolver(IMAGE_CONTEXT_VARIABLE, this); + service.registerOpener(IMAGE_CONTEXT_VARIABLE, this); + service.registerPasteHandler(this.handlePaste.bind(this)); + } + + async canResolve(request: AIVariableResolutionRequest, _: AIVariableContext): Promise { + return ImageContextVariable.isImageContextRequest(request) ? 1 : 0; + } + + async resolve(request: AIVariableResolutionRequest, _: AIVariableContext): Promise { + return ImageContextVariable.resolve(request as ImageContextVariableRequest); + } + + async canOpen(request: AIVariableResolutionRequest, context: AIVariableContext): Promise { + return ImageContextVariable.isImageContextRequest(request) && !!ImageContextVariable.parseRequest(request)?.wsRelativePath ? 1 : 0; + } + + async open(request: ImageContextVariableRequest, context: AIVariableContext): Promise { + const uri = await this.toUri(request); + if (!uri) { + throw new Error('Unable to resolve URI for request.'); + } + await open(this.openerService, uri); + } + + protected async toUri(request: ImageContextVariableRequest): Promise { + const variable = ImageContextVariable.parseRequest(request); + return variable?.wsRelativePath ? this.makeAbsolute(variable.wsRelativePath) : undefined; + } + + async handlePaste(event: ClipboardEvent, context: AIVariableContext): Promise { + if (!event.clipboardData?.items) { return undefined; } + + const variables: AIVariableResolutionRequest[] = []; + + for (const item of event.clipboardData.items) { + if (item.type.startsWith('image/')) { + const blob = item.getAsFile(); + if (blob) { + try { + const dataUrl = await this.readFileAsDataURL(blob); + // Extract the base64 data by removing the data URL prefix + // Format is like:  + const imageData = dataUrl.substring(dataUrl.indexOf(',') + 1); + variables.push(ImageContextVariable.createRequest({ + data: imageData, + name: blob.name || `pasted-image-${Date.now()}.png`, + mimeType: blob.type + })); + } catch (error) { + console.error('Failed to process pasted image:', error); + } + } + } + } + + return variables.length > 0 ? { variables } : undefined; + } + + private readFileAsDataURL(blob: Blob): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = e => { + if (!e.target?.result) { + reject(new Error('Failed to read file as data URL')); + return; + } + resolve(e.target.result as string); + }; + reader.onerror = () => reject(reader.error); + reader.readAsDataURL(blob); + }); + } + + protected async makeAbsolute(pathStr: string): Promise { + const path = new Path(Path.normalizePathSeparator(pathStr)); + if (!path.isAbsolute) { + const workspaceRoots = this.wsService.tryGetRoots(); + const wsUris = workspaceRoots.map(root => root.resource.resolve(path)); + for (const uri of wsUris) { + if (await this.fileService.exists(uri)) { + return uri; + } + } + } + const argUri = new URI(pathStr); + if (await this.fileService.exists(argUri)) { + return argUri; + } + return undefined; + } + + canHandle(element: object): number { + return ImageContextVariable.isImageContextRequest(element) ? 10 : -1; + } + + getIcon(element: ImageContextVariableRequest): string | undefined { + const path = ImageContextVariable.parseArg(element.arg).wsRelativePath; + return path ? this.labelProvider.getIcon(new URI(path)) : undefined; + } + + getName(element: ImageContextVariableRequest): string | undefined { + return ImageContextVariable.parseArg(element.arg).name; + } + + getDetails(element: ImageContextVariableRequest): string | undefined { + const path = ImageContextVariable.parseArg(element.arg).wsRelativePath; + return path ? this.labelProvider.getDetails(new URI(path)) : '[pasted]'; + } +} diff --git a/packages/ai-chat/src/common/chat-agents.ts b/packages/ai-chat/src/common/chat-agents.ts index c51c0e1981ef9..4a11124e33349 100644 --- a/packages/ai-chat/src/common/chat-agents.ts +++ b/packages/ai-chat/src/common/chat-agents.ts @@ -65,6 +65,7 @@ import { import { ChatToolRequest, ChatToolRequestService } from './chat-tool-request-service'; import { parseContents } from './parse-contents'; import { DefaultResponseContentFactory, ResponseContentMatcher, ResponseContentMatcherProvider } from './response-content-matcher'; +import { ImageContextVariable } from './image-context-variable'; /** * System message content, enriched with function descriptions. @@ -260,9 +261,16 @@ export abstract class AbstractChatAgent implements ChatAgent { text: text, }); } - request.context.images?.forEach(image => { - messages.push({ actor: 'user', type: 'image', image }); - }); + request.context.variables + .filter(variable => ImageContextVariable.isResolvedImageContext(variable)) + .map(variable => ImageContextVariable.parseResolved(variable)) + .filter(content => content !== undefined) + .map(content => messages.push({ + actor: 'user', type: 'image', image: { + base64data: content!.data, + mimeType: content!.mimeType + } + })); if (request.response.isComplete || includeResponseInProgress) { const responseMessages: LanguageModelMessage[] = request.response.response.content.flatMap(c => { if (ChatResponseContent.hasToLanguageModelMessage(c)) { diff --git a/packages/ai-chat/src/common/chat-model.ts b/packages/ai-chat/src/common/chat-model.ts index 7eb00765ca5b5..304c1abe05b79 100644 --- a/packages/ai-chat/src/common/chat-model.ts +++ b/packages/ai-chat/src/common/chat-model.ts @@ -22,7 +22,6 @@ import { AIVariableResolutionRequest, LanguageModelMessage, - LLMImageData, ResolvedAIContextVariable, TextMessage, ThinkingMessage, @@ -236,12 +235,10 @@ export interface ChatRequest { */ readonly referencedRequestId?: string; readonly variables?: readonly AIVariableResolutionRequest[]; - readonly images?: LLMImageData[]; } export interface ChatContext { variables: ResolvedAIContextVariable[]; - images?: LLMImageData[]; } export interface ChatRequestModel { diff --git a/packages/ai-chat/src/common/chat-request-parser.ts b/packages/ai-chat/src/common/chat-request-parser.ts index db1f1ed314413..b27c53e597410 100644 --- a/packages/ai-chat/src/common/chat-request-parser.ts +++ b/packages/ai-chat/src/common/chat-request-parser.ts @@ -104,74 +104,75 @@ export class ChatRequestParserImpl implements ChatRequestParser { const parts: ParsedChatRequestPart[] = []; const variables = new Map(); const toolRequests = new Map(); - if (request.text) { - const message = request.text; - for (let i = 0; i < message.length; i++) { - const previousChar = message.charAt(i - 1); - const char = message.charAt(i); - let newPart: ParsedChatRequestPart | undefined; + if (!request.text) { + return { parts, toolRequests, variables }; + } + const message = request.text; + for (let i = 0; i < message.length; i++) { + const previousChar = message.charAt(i - 1); + const char = message.charAt(i); + let newPart: ParsedChatRequestPart | undefined; - if (previousChar.match(/\s/) || i === 0) { - if (char === chatFunctionLeader) { - const functionPart = this.tryToParseFunction( - message.slice(i), - i - ); - newPart = functionPart; - if (functionPart) { - toolRequests.set(functionPart.toolRequest.id, functionPart.toolRequest); - } - } else if (char === chatVariableLeader) { - const variablePart = this.tryToParseVariable( - message.slice(i), - i, - parts - ); - newPart = variablePart; - if (variablePart) { - const variable = this.variableService.getVariable(variablePart.variableName); - if (variable) { - variables.set(variable.name, variable); - } + if (previousChar.match(/\s/) || i === 0) { + if (char === chatFunctionLeader) { + const functionPart = this.tryToParseFunction( + message.slice(i), + i + ); + newPart = functionPart; + if (functionPart) { + toolRequests.set(functionPart.toolRequest.id, functionPart.toolRequest); + } + } else if (char === chatVariableLeader) { + const variablePart = this.tryToParseVariable( + message.slice(i), + i, + parts + ); + newPart = variablePart; + if (variablePart) { + const variable = this.variableService.getVariable(variablePart.variableName); + if (variable) { + variables.set(variable.name, variable); } - } else if (char === chatAgentLeader) { - newPart = this.tryToParseAgent( - message.slice(i), - i, - parts, - location - ); } + } else if (char === chatAgentLeader) { + newPart = this.tryToParseAgent( + message.slice(i), + i, + parts, + location + ); } + } - if (newPart) { - if (i !== 0) { - // Insert a part for all the text we passed over, then insert the new parsed part - const previousPart = parts.at(-1); - const previousPartEnd = previousPart?.range.endExclusive ?? 0; - parts.push( - new ParsedChatRequestTextPart( - offsetRange(previousPartEnd, i), - message.slice(previousPartEnd, i) - ) - ); - } - - parts.push(newPart); + if (newPart) { + if (i !== 0) { + // Insert a part for all the text we passed over, then insert the new parsed part + const previousPart = parts.at(-1); + const previousPartEnd = previousPart?.range.endExclusive ?? 0; + parts.push( + new ParsedChatRequestTextPart( + offsetRange(previousPartEnd, i), + message.slice(previousPartEnd, i) + ) + ); } - } - const lastPart = parts.at(-1); - const lastPartEnd = lastPart?.range.endExclusive ?? 0; - if (lastPartEnd < message.length) { - parts.push( - new ParsedChatRequestTextPart( - offsetRange(lastPartEnd, message.length), - message.slice(lastPartEnd, message.length) - ) - ); + parts.push(newPart); } } + + const lastPart = parts.at(-1); + const lastPartEnd = lastPart?.range.endExclusive ?? 0; + if (lastPartEnd < message.length) { + parts.push( + new ParsedChatRequestTextPart( + offsetRange(lastPartEnd, message.length), + message.slice(lastPartEnd, message.length) + ) + ); + } return { parts, toolRequests, variables }; } diff --git a/packages/ai-chat/src/common/chat-service.ts b/packages/ai-chat/src/common/chat-service.ts index 2c47eb7f78d83..c9d85c6f95731 100644 --- a/packages/ai-chat/src/common/chat-service.ts +++ b/packages/ai-chat/src/common/chat-service.ts @@ -234,7 +234,6 @@ export class ChatServiceImpl implements ChatService { const resolutionContext: ChatSessionContext = { model: session.model }; const resolvedContext = await this.resolveChatContext(request.variables ?? session.model.context.getVariables(), resolutionContext); - resolvedContext.images = request.images; const parsedRequest = await this.chatRequestParser.parseChatRequest(request, session.model.location, resolvedContext); const agent = this.getAgent(parsedRequest, session); diff --git a/packages/ai-chat/src/common/image-context-variable.ts b/packages/ai-chat/src/common/image-context-variable.ts new file mode 100644 index 0000000000000..c762f8b5eee9e --- /dev/null +++ b/packages/ai-chat/src/common/image-context-variable.ts @@ -0,0 +1,116 @@ +// ***************************************************************************** +// Copyright (C) 2025 EclipseSource GmbH and others. +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License v. 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0. +// +// This Source Code may also be made available under the following Secondary +// Licenses when the conditions for such availability set forth in the Eclipse +// Public License v. 2.0 are satisfied: GNU General Public License, version 2 +// with the GNU Classpath Exception which is available at +// https://www.gnu.org/software/classpath/license.html. +// +// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 +// ***************************************************************************** + +import { + AIVariable, + AIVariableResolutionRequest, + ResolvedAIContextVariable +} from '@theia/ai-core'; + +export const IMAGE_CONTEXT_VARIABLE: AIVariable = { + id: 'imageContext', + description: 'Provides context information for an image', + name: 'imageContext', + label: 'Image File', + iconClasses: ['codicon', 'codicon-file-media'], + isContextVariable: true, + args: [ + { name: 'name', description: 'The name of the image file if available.', isOptional: true }, + { name: 'wsRelativePath', description: 'The workspace-relative path of the image file if available.', isOptional: true }, + { name: 'data', description: 'The image data in base64.' }, + { name: 'mimeType', description: 'The mimetype of the image.' } + ] +}; + +export interface ImageContextVariable { + name?: string; + wsRelativePath?: string; + data: string; + mimeType: string; +} + +export interface ImageContextVariableRequest extends AIVariableResolutionRequest { + variable: typeof IMAGE_CONTEXT_VARIABLE; + arg: string; +} + +export namespace ImageContextVariable { + export const name = 'name'; + export const wsRelativePath = 'wsRelativePath'; + export const data = 'data'; + export const mimeType = 'mimeType'; + + export function isImageContextRequest(request: object): request is ImageContextVariableRequest { + return AIVariableResolutionRequest.is(request) && request.variable.id === IMAGE_CONTEXT_VARIABLE.id && !!request.arg; + } + + export function isResolvedImageContext(resolved: object): resolved is ResolvedAIContextVariable & { arg: string } { + return ResolvedAIContextVariable.is(resolved) && resolved.variable.id === IMAGE_CONTEXT_VARIABLE.id && !!resolved.arg; + } + + export function parseRequest(request: AIVariableResolutionRequest): undefined | ImageContextVariable { + return isImageContextRequest(request) ? parseArg(request.arg) : undefined; + } + + export function resolve(request: ImageContextVariableRequest): ResolvedAIContextVariable { + const args = parseArg(request.arg); + return { + ...request, + value: args.wsRelativePath ?? args.name ?? 'Image', + contextValue: args.wsRelativePath ?? args.name ?? 'Image' + }; + } + + export function parseResolved(resolved: ResolvedAIContextVariable): undefined | ImageContextVariable { + return isResolvedImageContext(resolved) ? parseArg(resolved.arg) : undefined; + } + + export function createRequest(content: ImageContextVariable): ImageContextVariableRequest { + return { + variable: IMAGE_CONTEXT_VARIABLE, + arg: createArgString(content) + }; + } + + export function createArgString(args: ImageContextVariable): string { + return JSON.stringify(args); + } + + export function parseArg(argString: string): ImageContextVariable { + const result: Partial = {}; + + if (!argString) { + throw new Error('Invalid argument string: empty string'); + } + + try { + const parsed = JSON.parse(argString) as Partial; + Object.assign(result, parsed); + } catch (error) { + throw new Error(`Failed to parse JSON argument string: ${error.message}`); + } + + if (!result.data) { + throw new Error(`Missing required argument: ${data}`); + } + + if (!result.mimeType) { + throw new Error(`Missing required argument: ${mimeType}`); + } + + return result as ImageContextVariable; + } +} diff --git a/packages/ai-core/src/browser/frontend-variable-service.ts b/packages/ai-core/src/browser/frontend-variable-service.ts index 9ba46764cf68d..66449578ddfde 100644 --- a/packages/ai-core/src/browser/frontend-variable-service.ts +++ b/packages/ai-core/src/browser/frontend-variable-service.ts @@ -37,6 +37,13 @@ export interface AIVariableDropResult { text?: string }; +export type AIVariablePasteHandler = (event: ClipboardEvent, context: AIVariableContext) => Promise; + +export interface AIVariablePasteResult { + variables: AIVariableResolutionRequest[], + text?: string +}; + export interface AIVariableCompletionContext { /** Portion of user input to be used for filtering completion candidates. */ userInput: string; @@ -79,6 +86,10 @@ export interface FrontendVariableService extends AIVariableService { unregisterDropHandler(handler: AIVariableDropHandler): void; getDropResult(event: DragEvent, context: AIVariableContext): Promise; + registerPasteHandler(handler: AIVariablePasteHandler): Disposable; + unregisterPasteHandler(handler: AIVariablePasteHandler): void; + getPasteResult(event: ClipboardEvent, context: AIVariableContext): Promise; + registerOpener(variable: AIVariable, opener: AIVariableOpener): Disposable; unregisterOpener(variable: AIVariable, opener: AIVariableOpener): void; getOpener(name: string, arg: string | undefined, context: AIVariableContext): Promise; @@ -92,6 +103,7 @@ export interface FrontendVariableContribution { @injectable() export class DefaultFrontendVariableService extends DefaultAIVariableService implements FrontendApplicationContribution, FrontendVariableService { protected dropHandlers = new Set(); + protected pasteHandlers = new Set(); @inject(MessageService) protected readonly messageService: MessageService; @inject(AIVariableResourceResolver) protected readonly aiResourceResolver: AIVariableResourceResolver; @@ -125,6 +137,30 @@ export class DefaultFrontendVariableService extends DefaultAIVariableService imp return { variables, text }; } + registerPasteHandler(handler: AIVariablePasteHandler): Disposable { + this.pasteHandlers.add(handler); + return Disposable.create(() => this.unregisterPasteHandler(handler)); + } + + unregisterPasteHandler(handler: AIVariablePasteHandler): void { + this.pasteHandlers.delete(handler); + } + + async getPasteResult(event: ClipboardEvent, context: AIVariableContext): Promise { + let text: string | undefined = undefined; + const variables: AIVariableResolutionRequest[] = []; + for (const handler of this.pasteHandlers) { + const result = await handler(event, context); + if (result) { + variables.push(...result.variables); + if (text === undefined) { + text = result.text; + } + } + } + return { variables, text }; + } + registerOpener(variable: AIVariable, opener: AIVariableOpener): Disposable { const key = this.getKey(variable.name); if (!this.variables.get(key)) { diff --git a/packages/ai-core/src/common/language-model.ts b/packages/ai-core/src/common/language-model.ts index ce7ff72cf8d93..ad671cfe1520d 100644 --- a/packages/ai-core/src/common/language-model.ts +++ b/packages/ai-core/src/common/language-model.ts @@ -66,22 +66,21 @@ export interface ToolUseMessage { input: unknown; name: string; } -export interface UrlImageData { url: string }; -export interface Base64ImageData { - // base64 encoded image data - imageData: string; - // the media type - mediaType: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; +export type ImageMimeType = 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp' | 'image/bmp' | 'image/svg+xml' | string & {}; +export interface UrlImageContent { url: string }; +export interface Base64ImageContent { + base64data: string; + mimeType: ImageMimeType; }; -export type LLMImageData = UrlImageData | Base64ImageData; -export namespace LLMImageData { - export const isUrlImage = (obj: LLMImageData): obj is UrlImageData => 'url' in obj; - export const isBase64ImageData = (obj: LLMImageData): obj is Base64ImageData => 'imageData' in obj; +export type ImageContent = UrlImageContent | Base64ImageContent; +export namespace ImageContent { + export const isUrl = (obj: ImageContent): obj is UrlImageContent => 'url' in obj; + export const isBase64 = (obj: ImageContent): obj is Base64ImageContent => 'base64data' in obj && 'mimeType' in obj; } export interface ImageMessage { actor: 'ai' | 'user'; type: 'image'; - image: LLMImageData; + image: ImageContent; } export const isLanguageModelRequestMessage = (obj: unknown): obj is LanguageModelMessage => diff --git a/packages/ai-core/src/common/variable-service.ts b/packages/ai-core/src/common/variable-service.ts index 6a142bb6881a4..1348cef19a961 100644 --- a/packages/ai-core/src/common/variable-service.ts +++ b/packages/ai-core/src/common/variable-service.ts @@ -172,6 +172,7 @@ export interface AIVariableService { getVariable(name: string): Readonly | undefined; getVariables(): Readonly[]; getContextVariables(): Readonly[]; + registerVariable(variable: AIVariable): Disposable; unregisterVariable(name: string): void; readonly onDidChangeVariables: Event; @@ -287,12 +288,19 @@ export class DefaultAIVariableService implements AIVariableService { return this.getVariables().filter(AIContextVariable.is); } - registerResolver(variable: AIVariable, resolver: AIVariableResolver): Disposable { + registerVariable(variable: AIVariable): Disposable { const key = this.getKey(variable.name); if (!this.variables.get(key)) { this.variables.set(key, variable); this.onDidChangeVariablesEmitter.fire(); + return Disposable.create(() => this.unregisterVariable(variable.name)); } + return Disposable.NULL; + } + + registerResolver(variable: AIVariable, resolver: AIVariableResolver): Disposable { + this.registerVariable(variable); + const key = this.getKey(variable.name); const resolvers = this.resolvers.get(key) ?? []; resolvers.push(resolver); this.resolvers.set(key, resolvers); @@ -315,6 +323,7 @@ export class DefaultAIVariableService implements AIVariableService { } registerArgumentPicker(variable: AIVariable, argPicker: AIVariableArgPicker): Disposable { + this.registerVariable(variable); const key = this.getKey(variable.name); this.argPickers.set(key, argPicker); return Disposable.create(() => this.unregisterArgumentPicker(variable, argPicker)); @@ -333,6 +342,7 @@ export class DefaultAIVariableService implements AIVariableService { } registerArgumentCompletionProvider(variable: AIVariable, completionProvider: AIVariableArgCompletionProvider): Disposable { + this.registerVariable(variable); const key = this.getKey(variable.name); this.argCompletionProviders.set(key, completionProvider); return Disposable.create(() => this.unregisterArgumentCompletionProvider(variable, completionProvider)); diff --git a/packages/ai-google/src/node/google-language-model.ts b/packages/ai-google/src/node/google-language-model.ts index d548b7b9d427d..ec6d26b605618 100644 --- a/packages/ai-google/src/node/google-language-model.ts +++ b/packages/ai-google/src/node/google-language-model.ts @@ -24,7 +24,7 @@ import { LanguageModelTextResponse, TokenUsageService, UserRequest, - LLMImageData + ImageContent } from '@theia/ai-core'; import { CancellationToken } from '@theia/core'; import { GoogleGenAI, FunctionCallingConfigMode, FunctionDeclaration, Content, Schema, Part, Modality } from '@google/genai'; @@ -49,8 +49,8 @@ const convertMessageToPart = (message: LanguageModelMessage): Part[] | undefined } else if (LanguageModelMessage.isThinkingMessage(message)) { return [{ thought: true }, { text: message.thinking }]; - } else if (LanguageModelMessage.isImageMessage(message) && LLMImageData.isBase64ImageData(message.image)) { - return [{ inlineData: { data: message.image.imageData, mimeType: message.image.mediaType } }]; + } else if (LanguageModelMessage.isImageMessage(message) && ImageContent.isBase64(message.image)) { + return [{ inlineData: { data: message.image.base64data, mimeType: message.image.mimeType } }]; } }; /** diff --git a/packages/ai-openai/src/node/openai-language-model.ts b/packages/ai-openai/src/node/openai-language-model.ts index 07226108293e2..e1c725e0452ab 100644 --- a/packages/ai-openai/src/node/openai-language-model.ts +++ b/packages/ai-openai/src/node/openai-language-model.ts @@ -24,7 +24,7 @@ import { TextMessage, TokenUsageService, UserRequest, - LLMImageData + ImageContent } from '@theia/ai-core'; import { CancellationToken } from '@theia/core'; import { injectable } from '@theia/core/shared/inversify'; @@ -327,8 +327,8 @@ export class OpenAiModelUtils { type: 'image_url', image_url: { url: - LLMImageData.isBase64ImageData(message.image) ? - `data:${message.image.mediaType};base64,${message.image.imageData}` : + ImageContent.isBase64(message.image) ? + `data:${message.image.mimeType};base64,${message.image.base64data}` : message.image.url } }] From c8a2528376973b369073ea751ee80bc8a914cffa Mon Sep 17 00:00:00 2001 From: Philip Langer Date: Thu, 5 Jun 2025 17:03:49 +0200 Subject: [PATCH 5/5] nitpicks --- .../src/browser/chat-input-widget.tsx | 1 - .../ai-chat-ui/src/browser/style/index.css | 23 ++++++++++--------- .../file-chat-variable-contribution.ts | 8 ++++--- packages/ai-chat/src/common/chat-agents.ts | 12 ++++++---- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/packages/ai-chat-ui/src/browser/chat-input-widget.tsx b/packages/ai-chat-ui/src/browser/chat-input-widget.tsx index cbfde0c27382b..51c7fb9367115 100644 --- a/packages/ai-chat-ui/src/browser/chat-input-widget.tsx +++ b/packages/ai-chat-ui/src/browser/chat-input-widget.tsx @@ -395,7 +395,6 @@ const ChatInput: React.FunctionComponent = (props: ChatInpu const container = containerRef.current; if (container) { container.addEventListener('paste', handlePaste, true); - return () => { container.removeEventListener('paste', handlePaste, true); }; diff --git a/packages/ai-chat-ui/src/browser/style/index.css b/packages/ai-chat-ui/src/browser/style/index.css index c356d8a874e86..8fd1d7ad092c8 100644 --- a/packages/ai-chat-ui/src/browser/style/index.css +++ b/packages/ai-chat-ui/src/browser/style/index.css @@ -7,8 +7,8 @@ flex: 1; } -.chat-input-widget>.ps__rail-x, -.chat-input-widget>.ps__rail-y { +.chat-input-widget > .ps__rail-x, +.chat-input-widget > .ps__rail-y { display: none !important; } @@ -23,7 +23,7 @@ overflow-wrap: break-word; } -div:last-child>.theia-ChatNode { +div:last-child > .theia-ChatNode { border: none; } @@ -59,7 +59,6 @@ div:last-child>.theia-ChatNode { } @keyframes dots { - 0%, 20% { content: ""; @@ -122,7 +121,7 @@ div:last-child>.theia-ChatNode { padding-inline-start: 1rem; } -.theia-ChatNode li>p { +.theia-ChatNode li > p { margin-top: 0; margin-bottom: 0; } @@ -136,7 +135,7 @@ div:last-child>.theia-ChatNode { font-size: var(--theia-code-font-size); } -.theia-RequestNode>p div { +.theia-RequestNode > p div { display: inline; } @@ -358,7 +357,8 @@ div:last-child>.theia-ChatNode { text-align: center; } -.theia-ChatInput-ChangeSet-List .theia-ChatInput-ChangeSet-Icon.codicon::before { +.theia-ChatInput-ChangeSet-List + .theia-ChatInput-ChangeSet-Icon.codicon::before { font-size: var(--theia-ui-font-size1); } @@ -375,7 +375,8 @@ div:last-child>.theia-ChatNode { color: var(--theia-disabledForeground); } -.theia-ChatInput-ChangeSet-List .theia-ChatInput-ChangeSet-AdditionalInfo-SuffixIcon { +.theia-ChatInput-ChangeSet-List + .theia-ChatInput-ChangeSet-AdditionalInfo-SuffixIcon { font-size: var(--theia-ui-font-size0) px; margin-left: 4px; } @@ -502,7 +503,6 @@ div:last-child>.theia-ChatNode { padding-left: 8px !important; } - .theia-ChatInput-ImagePreview-Item { position: relative; border: var(--theia-border-width) solid var(--theia-dropdown-border); @@ -619,7 +619,8 @@ div:last-child>.theia-ChatNode { display: flex; flex-direction: column; gap: 8px; - border: var(--theia-border-width) solid var(--theia-sideBarSectionHeader-border); + border: var(--theia-border-width) solid + var(--theia-sideBarSectionHeader-border); padding: 8px 12px 12px; border-radius: 5px; margin: 0 0 8px 0; @@ -737,7 +738,7 @@ div:last-child>.theia-ChatNode { background: var(--theia-menu-background); border: 1px solid var(--theia-menu-border); border-radius: 4px; - box-shadow: 0 2px 8px rgba(0,0,0,0.15); + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); margin: 0; padding: 0; list-style: none; diff --git a/packages/ai-chat/src/browser/file-chat-variable-contribution.ts b/packages/ai-chat/src/browser/file-chat-variable-contribution.ts index 4e45d9a03a406..4f03640411081 100644 --- a/packages/ai-chat/src/browser/file-chat-variable-contribution.ts +++ b/packages/ai-chat/src/browser/file-chat-variable-contribution.ts @@ -17,7 +17,7 @@ import { AIVariableContext, AIVariableResolutionRequest, PromptText } from '@theia/ai-core'; import { AIVariableCompletionContext, AIVariableDropResult, FrontendVariableContribution, FrontendVariableService } from '@theia/ai-core/lib/browser'; import { FILE_VARIABLE } from '@theia/ai-core/lib/browser/file-variable-contribution'; -import { CancellationToken, QuickInputService, URI } from '@theia/core'; +import { CancellationToken, ILogger, QuickInputService, URI } from '@theia/core'; import { inject, injectable } from '@theia/core/shared/inversify'; import * as monaco from '@theia/monaco-editor-core'; import { FileQuickPickItem, QuickFileSelectService } from '@theia/file-search/lib/browser/quick-file-select-service'; @@ -40,6 +40,9 @@ export class FileChatVariableContribution implements FrontendVariableContributio @inject(QuickFileSelectService) protected readonly quickFileSelectService: QuickFileSelectService; + @inject(ILogger) + protected readonly logger: ILogger; + registerVariables(service: FrontendVariableService): void { service.registerArgumentPicker(FILE_VARIABLE, this.triggerArgumentPicker.bind(this)); service.registerArgumentPicker(IMAGE_CONTEXT_VARIABLE, this.imageArgumentPicker.bind(this)); @@ -191,7 +194,6 @@ export class FileChatVariableContribution implements FrontendVariableContributio protected async fileToBase64(uri: URI): Promise { try { const fileContent = await this.fileService.readFile(uri); - // Convert the array buffer to base64 const uint8Array = new Uint8Array(fileContent.value.buffer); let binary = ''; for (let i = 0; i < uint8Array.length; i++) { @@ -199,7 +201,7 @@ export class FileChatVariableContribution implements FrontendVariableContributio } return btoa(binary); } catch (error) { - console.error('Error reading file content:', error); + this.logger.error('Error reading file content:', error); return ''; } } diff --git a/packages/ai-chat/src/common/chat-agents.ts b/packages/ai-chat/src/common/chat-agents.ts index d8164c89b1ab3..7bc465f5825de 100644 --- a/packages/ai-chat/src/common/chat-agents.ts +++ b/packages/ai-chat/src/common/chat-agents.ts @@ -255,23 +255,27 @@ export abstract class AbstractChatAgent implements ChatAgent { const requestMessages = model.getRequests().flatMap(request => { const messages: LanguageModelMessage[] = []; const text = request.message.parts.map(part => part.promptText).join(''); - if (text.length !== 0) { + if (text.length > 0) { messages.push({ actor: 'user', type: 'text', text: text, }); } - request.context.variables + const imageMessages = request.context.variables .filter(variable => ImageContextVariable.isResolvedImageContext(variable)) .map(variable => ImageContextVariable.parseResolved(variable)) .filter(content => content !== undefined) - .map(content => messages.push({ - actor: 'user', type: 'image', image: { + .map(content => ({ + actor: 'user' as const, + type: 'image' as const, + image: { base64data: content!.data, mimeType: content!.mimeType } })); + messages.push(...imageMessages); + if (request.response.isComplete || includeResponseInProgress) { const responseMessages: LanguageModelMessage[] = request.response.response.content .filter(c => !ErrorChatResponseContent.is(c))