@@ -62,6 +62,7 @@ func OpenAIRegister(cmd []Client, opts []client.ClientOpt, flags *Flags) ([]Clie
62
62
{Name : "speak" , Description : "Create speech from a prompt" , Syntax : "(<voice>) <prompt>" , MinArgs : 3 , MaxArgs : 4 , Fn : openaiSpeak (openai , flags )},
63
63
{Name : "transcribe" , Description : "Transcribe audio to text" , Syntax : "<filename>" , MinArgs : 3 , MaxArgs : 3 , Fn : openaiTranscribe (openai , flags )},
64
64
{Name : "translate" , Description : "Translate audio to English" , Syntax : "<filename>" , MinArgs : 3 , MaxArgs : 3 , Fn : openaiTranslate (openai , flags )},
65
+ {Name : "caption" , Description : "Provide a caption for an image" , Syntax : "<filename>" , MinArgs : 3 , MaxArgs : 3 , Fn : openaiCaption (openai , flags )},
65
66
},
66
67
})
67
68
@@ -283,6 +284,32 @@ func openaiImages(client *openai.Client, flags *Flags) CommandFn {
283
284
}
284
285
}
285
286
287
+ func openaiCaption (client * openai.Client , flags * Flags ) CommandFn {
288
+ return func () error {
289
+ url , err := url .Parse (flags .Arg (2 ))
290
+ if err != nil {
291
+ return err
292
+ }
293
+ message := openai .NewMessage ("user" , "Provide a short caption for this image" )
294
+ if url .Scheme == "" || url .Scheme == "file" {
295
+ // TODO: Image needs to be uploaded first
296
+ message .AppendImageFile (url .Path )
297
+ } else {
298
+ message .AppendImageUrl (url .String ())
299
+ }
300
+ if response , err := client .Chat ([]* openai.Message {message }, openai .OptModel ("gpt-4-vision-preview" )); err != nil {
301
+ return err
302
+ } else if len (response .Choices ) == 0 {
303
+ return errors .New ("no response from OpenAI" )
304
+ } else if err := flags .Write (response .Choices [0 ].Message ); err != nil {
305
+ return err
306
+ }
307
+
308
+ // Return success
309
+ return nil
310
+ }
311
+ }
312
+
286
313
/////////////////////////////////////////////////////////////////////
287
314
// PRIVATE METHODS
288
315
0 commit comments