Skip to content

Commit 8c7eff1

Browse files
committed
Add initial support for images in the ai chat
Initial implementation of #15407
1 parent 19896b0 commit 8c7eff1

File tree

15 files changed

+428
-105
lines changed

15 files changed

+428
-105
lines changed

package-lock.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/ai-anthropic/src/node/anthropic-language-model.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ import {
2424
LanguageModelTextResponse,
2525
TokenUsageService,
2626
TokenUsageParams,
27-
UserRequest
27+
UserRequest,
28+
LLMImageData
2829
} from '@theia/ai-core';
2930
import { CancellationToken, isArray } from '@theia/core';
3031
import { Anthropic } from '@anthropic-ai/sdk';
@@ -48,6 +49,10 @@ const createMessageContent = (message: LanguageModelMessage): MessageParam['cont
4849
return [{ id: message.id, input: message.input, name: message.name, type: 'tool_use' }];
4950
} else if (LanguageModelMessage.isToolResultMessage(message)) {
5051
return [{ type: 'tool_result', tool_use_id: message.tool_use_id }];
52+
} else if (LanguageModelMessage.isImageMessage(message)) {
53+
if (LLMImageData.isBase64ImageData(message.image)) {
54+
return [{ type: 'image', source: { type: 'base64', media_type: message.image.mediaType, data: message.image.imageData } }];
55+
}
5156
}
5257
throw new Error(`Unknown message type:'${JSON.stringify(message)}'`);
5358
};
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// *****************************************************************************
2+
// Copyright (C) 2025 EclipseSource GmbH.
3+
//
4+
// This program and the accompanying materials are made available under the
5+
// terms of the Eclipse Public License v. 2.0 which is available at
6+
// http://www.eclipse.org/legal/epl-2.0.
7+
//
8+
// This Source Code may also be made available under the following Secondary
9+
// Licenses when the conditions for such availability set forth in the Eclipse
10+
// Public License v. 2.0 are satisfied: GNU General Public License, version 2
11+
// with the GNU Classpath Exception which is available at
12+
// https://www.gnu.org/software/classpath/license.html.
13+
//
14+
// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0
15+
// *****************************************************************************
16+
import { nls } from '@theia/core';
17+
import * as React from '@theia/core/shared/react';
18+
19+
// Interface for pasted image data
20+
export interface PastedImage {
21+
id: string;
22+
data: string;
23+
name: string;
24+
type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';
25+
}
26+
27+
// Image Preview Component
28+
interface ImagePreviewProps {
29+
images: PastedImage[];
30+
onRemove: (id: string) => void;
31+
}
32+
export const ImagePreview: React.FC<ImagePreviewProps> = ({ images, onRemove }) => {
33+
if (images.length === 0) { return undefined; }
34+
35+
return (
36+
<div className='theia-ChatInput-ImagePreview'>
37+
{images.map(img => (
38+
<div key={img.id} className='theia-ChatInput-ImagePreview-Item'>
39+
<img src={`data:${img.type};base64,${img.data}`} alt={img.name} />
40+
<div className='theia-ChatInput-ImagePreview-Actions'>
41+
<span
42+
className='codicon codicon-close action'
43+
title={nls.localizeByDefault('Remove')}
44+
onClick={e => {
45+
e.stopPropagation();
46+
onRemove(img.id);
47+
}} />
48+
</div>
49+
</div>
50+
))}
51+
</div>
52+
);
53+
};

packages/ai-chat-ui/src/browser/chat-input-widget.tsx

Lines changed: 134 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,22 @@
1414
// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0
1515
// *****************************************************************************
1616
import { ChangeSet, ChatAgent, ChatChangeEvent, ChatModel, ChatRequestModel, ChatService, ChatSuggestion } from '@theia/ai-chat';
17+
import { ChangeSetDecoratorService } from '@theia/ai-chat/lib/browser/change-set-decorator-service';
18+
import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core';
19+
import { FrontendVariableService } from '@theia/ai-core/lib/browser';
1720
import { Disposable, DisposableCollection, InMemoryResources, URI, nls } from '@theia/core';
1821
import { ContextMenuRenderer, LabelProvider, Message, OpenerService, ReactWidget } from '@theia/core/lib/browser';
1922
import { Deferred } from '@theia/core/lib/common/promise-util';
2023
import { inject, injectable, optional, postConstruct } from '@theia/core/shared/inversify';
2124
import * as React from '@theia/core/shared/react';
2225
import { IMouseEvent } from '@theia/monaco-editor-core';
23-
import { SimpleMonacoEditor } from '@theia/monaco/lib/browser/simple-monaco-editor';
2426
import { MonacoEditorProvider } from '@theia/monaco/lib/browser/monaco-editor-provider';
25-
import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution';
26-
import { AIVariableResolutionRequest } from '@theia/ai-core';
27-
import { FrontendVariableService } from '@theia/ai-core/lib/browser';
28-
import { ContextVariablePicker } from './context-variable-picker';
27+
import { SimpleMonacoEditor } from '@theia/monaco/lib/browser/simple-monaco-editor';
28+
import { ImagePreview, PastedImage } from './ImagePreview';
2929
import { ChangeSetActionRenderer, ChangeSetActionService } from './change-set-actions/change-set-action-service';
30-
import { ChangeSetDecoratorService } from '@theia/ai-chat/lib/browser/change-set-decorator-service';
3130
import { ChatInputAgentSuggestions } from './chat-input-agent-suggestions';
31+
import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution';
32+
import { ContextVariablePicker } from './context-variable-picker';
3233

3334
type Query = (query: string) => Promise<void>;
3435
type Unpin = () => void;
@@ -37,6 +38,12 @@ type DeleteChangeSet = (requestModel: ChatRequestModel) => void;
3738
type DeleteChangeSetElement = (requestModel: ChatRequestModel, index: number) => void;
3839
type OpenContextElement = (request: AIVariableResolutionRequest) => unknown;
3940

41+
// Interface for the payload submitted to the AI
42+
// interface ChatPayload {
43+
// text: string;
44+
// images?: PastedImage[];
45+
// }
46+
4047
export const AIChatInputConfiguration = Symbol('AIChatInputConfiguration');
4148
export interface AIChatInputConfiguration {
4249
showContext?: boolean;
@@ -132,13 +139,54 @@ export class AIChatInputWidget extends ReactWidget {
132139
this.update();
133140
}
134141

142+
// State for pasted images
143+
private _pastedImages: PastedImage[] = [];
144+
public get pastedImages(): PastedImage[] {
145+
return this._pastedImages;
146+
}
147+
135148
@postConstruct()
136149
protected init(): void {
137150
this.id = AIChatInputWidget.ID;
138151
this.title.closable = false;
139152
this.update();
140153
}
141154

155+
// Process a file blob into an image
156+
private processImageFromClipboard(blob: File): void {
157+
const reader = new FileReader();
158+
reader.onload = e => {
159+
if (!e.target?.result) { return; }
160+
161+
const imageId = `img-${Date.now()}`;
162+
const dataUrl = e.target.result as string;
163+
164+
// Extract the base64 data by removing the data URL prefix
165+
// Format is like: 
166+
const imageData = dataUrl.substring(dataUrl.indexOf(',') + 1);
167+
168+
// Add image to state
169+
const newImage: PastedImage = {
170+
id: imageId,
171+
data: imageData, // Store just the base64 data without the prefix
172+
name: blob.name || `pasted-image-${Date.now()}.png`,
173+
type: blob.type as PastedImage['type']
174+
};
175+
176+
this._pastedImages = [...this._pastedImages, newImage];
177+
178+
this.update();
179+
};
180+
181+
reader.readAsDataURL(blob);
182+
}
183+
184+
// Remove an image by id
185+
public removeImage(id: string): void {
186+
this._pastedImages = this._pastedImages.filter(img => img.id !== id);
187+
this.update();
188+
}
189+
142190
protected override onActivateRequest(msg: Message): void {
143191
super.onActivateRequest(msg);
144192
this.editorReady.promise.then(() => {
@@ -185,6 +233,9 @@ export class AIChatInputWidget extends ReactWidget {
185233
decoratorService={this.changeSetDecoratorService}
186234
initialValue={this._initialValue}
187235
openerService={this.openerService}
236+
pastedImages={this._pastedImages}
237+
onRemoveImage={this.removeImage.bind(this)}
238+
onImagePasted={this.processImageFromClipboard.bind(this)}
188239
suggestions={this._chatModel.suggestions}
189240
/>
190241
);
@@ -268,7 +319,7 @@ export class AIChatInputWidget extends ReactWidget {
268319

269320
interface ChatInputProperties {
270321
onCancel: (requestModel: ChatRequestModel) => void;
271-
onQuery: (query: string) => void;
322+
onQuery: (query?: string, images?: LLMImageData[]) => void;
272323
onUnpin: () => void;
273324
onDragOver: (event: React.DragEvent) => void;
274325
onDrop: (event: React.DragEvent) => void;
@@ -294,6 +345,9 @@ interface ChatInputProperties {
294345
decoratorService: ChangeSetDecoratorService;
295346
initialValue?: string;
296347
openerService: OpenerService;
348+
pastedImages: PastedImage[];
349+
onRemoveImage: (id: string) => void;
350+
onImagePasted: (blob: File) => void;
297351
suggestions: readonly ChatSuggestion[]
298352
}
299353

@@ -321,6 +375,38 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
321375
// eslint-disable-next-line no-null/no-null
322376
const placeholderRef = React.useRef<HTMLDivElement | null>(null);
323377
const editorRef = React.useRef<SimpleMonacoEditor | undefined>(undefined);
378+
// eslint-disable-next-line no-null/no-null
379+
const containerRef = React.useRef<HTMLDivElement>(null);
380+
381+
// Handle paste events on the container
382+
const handlePaste = React.useCallback((e: ClipboardEvent) => {
383+
if (!e.clipboardData?.items) { return; }
384+
385+
for (const item of e.clipboardData.items) {
386+
if (item.type.startsWith('image/')) {
387+
const blob = item.getAsFile();
388+
if (blob) {
389+
e.preventDefault();
390+
e.stopPropagation();
391+
props.onImagePasted(blob);
392+
break;
393+
}
394+
}
395+
}
396+
}, [props.onImagePasted]);
397+
398+
// Set up paste handler on the container div
399+
React.useEffect(() => {
400+
const container = containerRef.current;
401+
if (container) {
402+
container.addEventListener('paste', handlePaste, true);
403+
404+
return () => {
405+
container.removeEventListener('paste', handlePaste, true);
406+
};
407+
}
408+
return undefined;
409+
}, [handlePaste]);
324410

325411
React.useEffect(() => {
326412
const uri = props.resourceUriProvider();
@@ -451,7 +537,7 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
451537
responseListenerRef.current?.dispose();
452538
responseListenerRef.current = undefined;
453539
};
454-
}, [props.chatModel]);
540+
}, [props.chatModel, props.actionService, props.labelProvider]);
455541

456542
React.useEffect(() => {
457543
const disposable = props.actionService.onDidChange(() => {
@@ -460,7 +546,14 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
460546
setChangeSetUI(current => !current ? current : { ...current, actions: newActions });
461547
});
462548
return () => disposable.dispose();
463-
});
549+
}, [props.actionService, props.chatModel.changeSet]);
550+
551+
// // Extract image references from text
552+
// const extractImageReferences = (text: string): string[] => {
553+
// const regex = /!\[.*?\]\((img-\d+)\)/g;
554+
// const matches = [...text.matchAll(regex)];
555+
// return matches.map(match => match[1]);
556+
// };
464557

465558
React.useEffect(() => {
466559
const disposable = props.decoratorService.onDidChangeDecorations(() => {
@@ -486,13 +579,19 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
486579
}, [editorRef]);
487580

488581
const submit = React.useCallback(function submit(value: string): void {
489-
if (!value || value.trim().length === 0) {
582+
if ((!value || value.trim().length === 0) && props.pastedImages.length === 0) {
490583
return;
491584
}
585+
492586
setInProgress(true);
493-
props.onQuery(value);
587+
props.onQuery(value, props.pastedImages.map(p => ({ imageData: p.data, mediaType: p.type })));
494588
setValue('');
495-
}, [props.context, props.onQuery, setValue]);
589+
590+
if (editorRef.current) {
591+
editorRef.current.document.textEditorModel.setValue('');
592+
}// Clear pasted images after submission
593+
props.pastedImages.forEach(image => props.onRemoveImage(image.id));
594+
}, [props.context, props.onQuery, setValue, props.pastedImages]);
496595

497596
const onKeyDown = React.useCallback((event: React.KeyboardEvent) => {
498597
if (!props.isEnabled) {
@@ -592,21 +691,31 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
592691

593692
const contextUI = buildContextUI(props.context, props.labelProvider, props.onDeleteContextElement, props.onOpenContextElement);
594693

595-
return <div className='theia-ChatInput' onDragOver={props.onDragOver} onDrop={props.onDrop} >
596-
{<ChatInputAgentSuggestions suggestions={props.suggestions} opener={props.openerService} />}
597-
{props.showChangeSet && changeSetUI?.elements &&
598-
<ChangeSetBox changeSet={changeSetUI} />
599-
}
600-
<div className='theia-ChatInput-Editor-Box'>
601-
<div className='theia-ChatInput-Editor' ref={editorContainerRef} onKeyDown={onKeyDown} onFocus={handleInputFocus} onBlur={handleInputBlur}>
602-
<div ref={placeholderRef} className='theia-ChatInput-Editor-Placeholder'>{nls.localizeByDefault('Ask a question')}</div>
603-
</div>
604-
{props.context && props.context.length > 0 &&
605-
<ChatContext context={contextUI.context} />
694+
return (
695+
<div
696+
className='theia-ChatInput'
697+
onDragOver={props.onDragOver}
698+
onDrop={props.onDrop}
699+
ref={containerRef}
700+
>
701+
{<ChatInputAgentSuggestions suggestions={props.suggestions} opener={props.openerService} />}
702+
{props.showChangeSet && changeSetUI?.elements &&
703+
<ChangeSetBox changeSet={changeSetUI} />
606704
}
607-
<ChatInputOptions leftOptions={leftOptions} rightOptions={rightOptions} />
705+
<div className='theia-ChatInput-Editor-Box'>
706+
<div className='theia-ChatInput-Editor' ref={editorContainerRef} onKeyDown={onKeyDown} onFocus={handleInputFocus} onBlur={handleInputBlur}>
707+
<div ref={placeholderRef} className='theia-ChatInput-Editor-Placeholder'>{nls.localizeByDefault('Ask a question')}</div>
708+
</div>
709+
{props.pastedImages.length > 0 &&
710+
<ImagePreview images={props.pastedImages} onRemove={props.onRemoveImage} />
711+
}
712+
{props.context && props.context.length > 0 &&
713+
<ChatContext context={contextUI.context} />
714+
}
715+
<ChatInputOptions leftOptions={leftOptions} rightOptions={rightOptions} />
716+
</div>
608717
</div>
609-
</div>;
718+
);
610719
};
611720

612721
const noPropagation = (handler: () => void) => (e: React.MouseEvent) => {

0 commit comments

Comments
 (0)