Skip to content

Commit 527dc5b

Browse files
committed
Add initial support for images in the ai chat
Initial implementation of #15407
1 parent 8d34507 commit 527dc5b

File tree

14 files changed

+403
-93
lines changed

14 files changed

+403
-93
lines changed

packages/ai-anthropic/src/node/anthropic-language-model.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ import {
2424
LanguageModelTextResponse,
2525
TokenUsageService,
2626
TokenUsageParams,
27-
UserRequest
27+
UserRequest,
28+
LLMImageData
2829
} from '@theia/ai-core';
2930
import { CancellationToken, isArray } from '@theia/core';
3031
import { Anthropic } from '@anthropic-ai/sdk';
@@ -48,6 +49,10 @@ const createMessageContent = (message: LanguageModelMessage): MessageParam['cont
4849
return [{ id: message.id, input: message.input, name: message.name, type: 'tool_use' }];
4950
} else if (LanguageModelMessage.isToolResultMessage(message)) {
5051
return [{ type: 'tool_result', tool_use_id: message.tool_use_id }];
52+
} else if (LanguageModelMessage.isImageMessage(message)) {
53+
if (LLMImageData.isBase64ImageData(message.image)) {
54+
return [{ type: 'image', source: { type: 'base64', media_type: message.image.mediaType, data: message.image.imageData } }];
55+
}
5156
}
5257
throw new Error(`Unknown message type:'${JSON.stringify(message)}'`);
5358
};
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// *****************************************************************************
2+
// Copyright (C) 2025 EclipseSource GmbH.
3+
//
4+
// This program and the accompanying materials are made available under the
5+
// terms of the Eclipse Public License v. 2.0 which is available at
6+
// http://www.eclipse.org/legal/epl-2.0.
7+
//
8+
// This Source Code may also be made available under the following Secondary
9+
// Licenses when the conditions for such availability set forth in the Eclipse
10+
// Public License v. 2.0 are satisfied: GNU General Public License, version 2
11+
// with the GNU Classpath Exception which is available at
12+
// https://www.gnu.org/software/classpath/license.html.
13+
//
14+
// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0
15+
// *****************************************************************************
16+
import { nls } from '@theia/core';
17+
import * as React from '@theia/core/shared/react';
18+
19+
// Interface for pasted image data
20+
export interface PastedImage {
21+
id: string;
22+
data: string;
23+
name: string;
24+
type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';
25+
}
26+
27+
// Image Preview Component
28+
interface ImagePreviewProps {
29+
images: PastedImage[];
30+
onRemove: (id: string) => void;
31+
}
32+
export const ImagePreview: React.FC<ImagePreviewProps> = ({ images, onRemove }) => {
33+
if (images.length === 0) { return undefined; }
34+
35+
return (
36+
<div className='theia-ChatInput-ImagePreview'>
37+
{images.map(img => (
38+
<div key={img.id} className='theia-ChatInput-ImagePreview-Item'>
39+
<img src={`data:${img.type};base64,${img.data}`} alt={img.name} />
40+
<div className='theia-ChatInput-ImagePreview-Actions'>
41+
<span
42+
className='codicon codicon-close action'
43+
title={nls.localizeByDefault('Remove')}
44+
onClick={e => {
45+
e.stopPropagation();
46+
onRemove(img.id);
47+
}} />
48+
</div>
49+
</div>
50+
))}
51+
</div>
52+
);
53+
};

packages/ai-chat-ui/src/browser/chat-input-widget.tsx

Lines changed: 126 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,24 @@ import { IMouseEvent } from '@theia/monaco-editor-core';
2323
import { MonacoEditor } from '@theia/monaco/lib/browser/monaco-editor';
2424
import { MonacoEditorProvider } from '@theia/monaco/lib/browser/monaco-editor-provider';
2525
import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution';
26-
import { AIVariableResolutionRequest } from '@theia/ai-core';
26+
import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core';
2727
import { FrontendVariableService } from '@theia/ai-core/lib/browser';
2828
import { ContextVariablePicker } from './context-variable-picker';
2929
import { ChangeSetActionRenderer, ChangeSetActionService } from './change-set-actions/change-set-action-service';
30+
import { ImagePreview, PastedImage } from './ImagePreview';
3031

3132
type Query = (query: string) => Promise<void>;
3233
type Unpin = () => void;
3334
type Cancel = (requestModel: ChatRequestModel) => void;
3435
type DeleteChangeSet = (requestModel: ChatRequestModel) => void;
3536
type DeleteChangeSetElement = (requestModel: ChatRequestModel, index: number) => void;
3637

38+
// Interface for the payload submitted to the AI
39+
// interface ChatPayload {
40+
// text: string;
41+
// images?: PastedImage[];
42+
// }
43+
3744
export const AIChatInputConfiguration = Symbol('AIChatInputConfiguration');
3845
export interface AIChatInputConfiguration {
3946
showContext?: boolean;
@@ -114,13 +121,54 @@ export class AIChatInputWidget extends ReactWidget {
114121
this.update();
115122
}
116123

124+
// State for pasted images
125+
private _pastedImages: PastedImage[] = [];
126+
public get pastedImages(): PastedImage[] {
127+
return this._pastedImages;
128+
}
129+
117130
@postConstruct()
118131
protected init(): void {
119132
this.id = AIChatInputWidget.ID;
120133
this.title.closable = false;
121134
this.update();
122135
}
123136

137+
// Process a file blob into an image
138+
private processImageFromClipboard(blob: File): void {
139+
const reader = new FileReader();
140+
reader.onload = e => {
141+
if (!e.target?.result) { return; }
142+
143+
const imageId = `img-${Date.now()}`;
144+
const dataUrl = e.target.result as string;
145+
146+
// Extract the base64 data by removing the data URL prefix
147+
// Format is like: data:image/png;base64,BASE64DATA
148+
const imageData = dataUrl.substring(dataUrl.indexOf(',') + 1);
149+
150+
// Add image to state
151+
const newImage: PastedImage = {
152+
id: imageId,
153+
data: imageData, // Store just the base64 data without the prefix
154+
name: blob.name || `pasted-image-${Date.now()}.png`,
155+
type: blob.type as PastedImage['type']
156+
};
157+
158+
this._pastedImages = [...this._pastedImages, newImage];
159+
160+
this.update();
161+
};
162+
163+
reader.readAsDataURL(blob);
164+
}
165+
166+
// Remove an image by id
167+
public removeImage(id: string): void {
168+
this._pastedImages = this._pastedImages.filter(img => img.id !== id);
169+
this.update();
170+
}
171+
124172
protected override onActivateRequest(msg: Message): void {
125173
super.onActivateRequest(msg);
126174
this.editorReady.promise.then(() => {
@@ -157,6 +205,9 @@ export class AIChatInputWidget extends ReactWidget {
157205
showPinnedAgent={this.configuration?.showPinnedAgent}
158206
labelProvider={this.labelProvider}
159207
actionService={this.changeSetActionService}
208+
pastedImages={this._pastedImages}
209+
onRemoveImage={this.removeImage.bind(this)}
210+
onImagePasted={this.processImageFromClipboard.bind(this)}
160211
/>
161212
);
162213
}
@@ -229,7 +280,7 @@ export class AIChatInputWidget extends ReactWidget {
229280

230281
interface ChatInputProperties {
231282
onCancel: (requestModel: ChatRequestModel) => void;
232-
onQuery: (query: string) => void;
283+
onQuery: (query?: string, images?: LLMImageData[]) => void;
233284
onUnpin: () => void;
234285
onDragOver: (event: React.DragEvent) => void;
235286
onDrop: (event: React.DragEvent) => void;
@@ -249,6 +300,9 @@ interface ChatInputProperties {
249300
showPinnedAgent?: boolean;
250301
labelProvider: LabelProvider;
251302
actionService: ChangeSetActionService;
303+
pastedImages: PastedImage[];
304+
onRemoveImage: (id: string) => void;
305+
onImagePasted: (blob: File) => void;
252306
}
253307

254308
const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInputProperties) => {
@@ -274,6 +328,38 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
274328
// eslint-disable-next-line no-null/no-null
275329
const placeholderRef = React.useRef<HTMLDivElement | null>(null);
276330
const editorRef = React.useRef<MonacoEditor | undefined>(undefined);
331+
// eslint-disable-next-line no-null/no-null
332+
const containerRef = React.useRef<HTMLDivElement>(null);
333+
334+
// Handle paste events on the container
335+
const handlePaste = React.useCallback((e: ClipboardEvent) => {
336+
if (!e.clipboardData?.items) { return; }
337+
338+
for (const item of e.clipboardData.items) {
339+
if (item.type.startsWith('image/')) {
340+
const blob = item.getAsFile();
341+
if (blob) {
342+
e.preventDefault();
343+
e.stopPropagation();
344+
props.onImagePasted(blob);
345+
break;
346+
}
347+
}
348+
}
349+
}, [props.onImagePasted]);
350+
351+
// Set up paste handler on the container div
352+
React.useEffect(() => {
353+
const container = containerRef.current;
354+
if (container) {
355+
container.addEventListener('paste', handlePaste, true);
356+
357+
return () => {
358+
container.removeEventListener('paste', handlePaste, true);
359+
};
360+
}
361+
return undefined;
362+
}, [handlePaste]);
277363

278364
React.useEffect(() => {
279365
const uri = new URI(`ai-chat:/input.${CHAT_VIEW_LANGUAGE_EXTENSION}`);
@@ -397,7 +483,7 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
397483
responseListenerRef.current?.dispose();
398484
responseListenerRef.current = undefined;
399485
};
400-
}, [props.chatModel]);
486+
}, [props.chatModel, props.actionService, props.labelProvider]);
401487

402488
React.useEffect(() => {
403489
const disposable = props.actionService.onDidChange(() => {
@@ -406,18 +492,28 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
406492
setChangeSetUI(current => !current ? current : { ...current, actions: newActions });
407493
});
408494
return () => disposable.dispose();
409-
});
495+
}, [props.actionService, props.chatModel.changeSet]);
496+
497+
// // Extract image references from text
498+
// const extractImageReferences = (text: string): string[] => {
499+
// const regex = /!\[.*?\]\((img-\d+)\)/g;
500+
// const matches = [...text.matchAll(regex)];
501+
// return matches.map(match => match[1]);
502+
// };
410503

411504
const submit = React.useCallback(function submit(value: string): void {
412-
if (!value || value.trim().length === 0) {
505+
if ((!value || value.trim().length === 0) && props.pastedImages.length === 0) {
413506
return;
414507
}
508+
415509
setInProgress(true);
416-
props.onQuery(value);
510+
props.onQuery(value, props.pastedImages.map(p => ({ imageData: p.data, mediaType: p.type })));
511+
417512
if (editorRef.current) {
418513
editorRef.current.document.textEditorModel.setValue('');
419-
}
420-
}, [props.context, props.onQuery, editorRef]);
514+
}// Clear pasted images after submission
515+
props.pastedImages.forEach(image => props.onRemoveImage(image.id));
516+
}, [props.onQuery, props.pastedImages]);
421517

422518
const onKeyDown = React.useCallback((event: React.KeyboardEvent) => {
423519
if (!props.isEnabled) {
@@ -517,20 +613,30 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
517613

518614
const contextUI = buildContextUI(props.context, props.labelProvider, props.onDeleteContextElement);
519615

520-
return <div className='theia-ChatInput' onDragOver={props.onDragOver} onDrop={props.onDrop} >
521-
{changeSetUI?.elements &&
522-
<ChangeSetBox changeSet={changeSetUI} />
523-
}
524-
<div className='theia-ChatInput-Editor-Box'>
525-
<div className='theia-ChatInput-Editor' ref={editorContainerRef} onKeyDown={onKeyDown} onFocus={handleInputFocus} onBlur={handleInputBlur}>
526-
<div ref={placeholderRef} className='theia-ChatInput-Editor-Placeholder'>{nls.localizeByDefault('Ask a question')}</div>
527-
</div>
528-
{props.context && props.context.length > 0 &&
529-
<ChatContext context={contextUI.context} />
616+
return (
617+
<div
618+
className='theia-ChatInput'
619+
onDragOver={props.onDragOver}
620+
onDrop={props.onDrop}
621+
ref={containerRef}
622+
>
623+
{changeSetUI?.elements &&
624+
<ChangeSetBox changeSet={changeSetUI} />
530625
}
531-
<ChatInputOptions leftOptions={leftOptions} rightOptions={rightOptions} />
626+
<div className='theia-ChatInput-Editor-Box'>
627+
<div className='theia-ChatInput-Editor' ref={editorContainerRef} onKeyDown={onKeyDown} onFocus={handleInputFocus} onBlur={handleInputBlur}>
628+
<div ref={placeholderRef} className='theia-ChatInput-Editor-Placeholder'>{nls.localizeByDefault('Ask a question')}</div>
629+
</div>
630+
{props.pastedImages.length > 0 &&
631+
<ImagePreview images={props.pastedImages} onRemove={props.onRemoveImage} />
632+
}
633+
{props.context && props.context.length > 0 &&
634+
<ChatContext context={contextUI.context} />
635+
}
636+
<ChatInputOptions leftOptions={leftOptions} rightOptions={rightOptions} />
637+
</div>
532638
</div>
533-
</div>;
639+
);
534640
};
535641

536642
const noPropagation = (handler: () => void) => (e: React.MouseEvent) => {

packages/ai-chat-ui/src/browser/chat-tree-view/chat-view-tree-widget.tsx

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ import { nls } from '@theia/core/lib/common/nls';
5151
import { ChatNodeToolbarActionContribution } from '../chat-node-toolbar-action-contribution';
5252
import { ChatResponsePartRenderer } from '../chat-response-part-renderer';
5353
import { useMarkdownRendering } from '../chat-response-renderer/markdown-part-renderer';
54-
import { AIVariableService } from '@theia/ai-core';
54+
import { AIVariableService, LLMImageData } from '@theia/ai-core';
5555
import { ProgressMessage } from '../chat-progress-message';
5656

5757
// TODO Instead of directly operating on the ChatRequestModel we could use an intermediate view model
@@ -410,6 +410,8 @@ const ChatRequestRender = (
410410
openerService: OpenerService
411411
}) => {
412412
const parts = node.request.message.parts;
413+
const images = node.request.images || [];
414+
413415
return (
414416
<div className="theia-RequestNode">
415417
<p>
@@ -442,6 +444,20 @@ const ChatRequestRender = (
442444
}
443445
})}
444446
</p>
447+
{images.length > 0 && (
448+
<div className="theia-RequestNode-Images">
449+
{images.map((img, index) => (
450+
<div key={`img-${index}`} className="theia-RequestNode-ImageContainer">
451+
{LLMImageData.isBase64ImageData(img) ?
452+
<img
453+
src={`data:${img.mediaType};base64,${img.imageData}`}
454+
alt={`Image ${index + 1}`}
455+
className="theia-RequestNode-Image"
456+
/> : undefined}
457+
</div>
458+
))}
459+
</div>
460+
)}
445461
</div>
446462
);
447463
};

packages/ai-chat-ui/src/browser/chat-view-contribution.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ export class ChatViewMenuContribution implements MenuContribution, CommandContri
115115

116116
protected getCopyText(arg: RequestNode | ResponseNode): string {
117117
if (isRequestNode(arg)) {
118-
return arg.request.request.text;
118+
return arg.request.request.text ?? '';
119119
} else if (isResponseNode(arg)) {
120120
return arg.response.response.asDisplayString();
121121
}

packages/ai-chat-ui/src/browser/chat-view-widget.tsx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import { inject, injectable, postConstruct } from '@theia/core/shared/inversify'
2121
import { AIChatInputWidget } from './chat-input-widget';
2222
import { ChatViewTreeWidget } from './chat-tree-view/chat-view-tree-widget';
2323
import { AIActivationService } from '@theia/ai-core/lib/browser/ai-activation-service';
24-
import { AIVariableResolutionRequest } from '@theia/ai-core';
24+
import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core';
2525

2626
export namespace ChatViewWidget {
2727
export interface State {
@@ -164,10 +164,10 @@ export class ChatViewWidget extends BaseWidget implements ExtractableWidget, Sta
164164
return this.onStateChangedEmitter.event;
165165
}
166166

167-
protected async onQuery(query: string): Promise<void> {
168-
if (query.length === 0) { return; }
167+
protected async onQuery(query?: string, imageData?: LLMImageData[]): Promise<void> {
168+
if ((!query || query.length === 0) && (!imageData || imageData.length === 0)) { return; }
169169

170-
const chatRequest: ChatRequest = { text: query };
170+
const chatRequest: ChatRequest = { text: query, images: imageData };
171171
const requestProgress = await this.chatService.sendRequest(this.chatSession.id, chatRequest);
172172
requestProgress?.responseCompleted.then(responseModel => {
173173
if (responseModel.isError) {

0 commit comments

Comments
 (0)