@@ -2,80 +2,70 @@ module RubyLLM
2
2
module Providers
3
3
module Mistral
4
4
# Media handling for Mistral models
5
- #
6
- # NOTE: There's currently an issue with Pixtral vision capabilities in the test suite.
7
- # The content array contains nil values when an image is attached, which causes the API to return errors.
8
- # This might be due to how images are being attached or formatted in the Content class.
9
- # The test in chat_content_spec.rb for 'pixtral-12b-latest can understand images' currently fails.
10
- # The debug output shows: content: [{type: "text", text: "..."}, nil] where the second element should be the image.
11
- # This likely requires fixes in the core library's Content class or how it interacts with provider-specific formatting.
5
+
12
6
module Media
13
7
module_function
14
8
15
- def supports_image? ( model_id )
16
- # Check if the model supports vision according to the model capabilities
17
- capabilities . supports_vision? ( model_id )
18
- end
19
-
20
- def supports_audio? ( model_id )
21
- # Check if the model supports audio according to the model capabilities
22
- capabilities . supports_audio? ( model_id )
23
- end
24
-
25
- # Moved from chat.rb
26
9
def format_content ( content )
27
- return content unless content . is_a? ( Array )
10
+ return content unless content . is_a? ( Content )
28
11
29
- RubyLLM . logger . debug "Formatting multimodal content: #{ content . inspect } "
12
+ parts = [ ]
13
+ parts << { type : "text" , text : content . text } if content . text
30
14
31
- # Filter out nil values
32
- filtered_content = content . compact
33
-
34
- RubyLLM . logger . debug "Filtered content: #{ filtered_content . inspect } "
35
-
36
- filtered_content . map do |item |
37
- if item . is_a? ( Hash ) && item [ :type ] == "image"
38
- format_image_content ( item )
15
+ content . attachments . each do |attachment |
16
+ case attachment . type
17
+ when :image
18
+ parts << format_image ( attachment )
19
+ when :pdf
20
+ # Mistral doesn't currently support PDFs other than
21
+ # through the OCR API
22
+ raise UnsupportedAttachmentError , attachment . type
23
+ when :audio
24
+ # Mistral doesn't currently support audio
25
+ raise UnsupportedAttachmentError , attachment . type
26
+ when :text
27
+ # Mistral doesn't support text files as attachments, so we'll append to the text
28
+ if parts . first && parts . first [ :type ] == "text"
29
+ parts . first [ :text ] += "\n \n " + Utils . format_text_file_for_llm ( attachment )
30
+ else
31
+ parts << { type : "text" , text : Utils . format_text_file_for_llm ( attachment ) }
32
+ end
39
33
else
40
- item # Pass through text or other types
34
+ raise UnsupportedAttachmentError , attachment . type
41
35
end
42
36
end
37
+
38
+ # Filter out nil values and return the formatted content array
39
+ parts . compact
43
40
end
44
41
45
42
# Format image according to Mistral API requirements
46
- # @param image [Hash ] Image data hash from Content class
43
+ # @param image [Attachment ] Image attachment
47
44
# @return [Hash] Formatted image data for Mistral API
48
- def format_image_content ( image )
49
- RubyLLM . logger . debug "Formatting image content: #{ image . inspect } "
50
-
51
- if image [ :source ] . is_a? ( Hash )
52
- if image [ :source ] [ :url ]
53
- # Direct URL from source hash
54
- {
55
- type : "image_url" ,
56
- image_url : image [ :source ] [ :url ]
57
- }
58
- elsif image [ :source ] [ :type ] == 'base64'
59
- # Base64 data from source hash
60
- data_uri = "data:#{ image [ :source ] [ :media_type ] } ;base64,#{ image [ :source ] [ :data ] } "
61
- {
62
- type : "image_url" ,
63
- image_url : data_uri
64
- }
65
- else
66
- RubyLLM . logger . warn "Invalid image source format: #{ image [ :source ] } "
67
- nil
68
- end
69
- elsif image [ :source ] . is_a? ( String )
70
- # Direct URL string
71
- {
72
- type : "image_url" ,
73
- image_url : image [ :source ]
74
- }
45
+ def format_image ( attachment )
46
+ url = if attachment . respond_to? ( :source ) && attachment . source . to_s . match? ( /^https?:\/ \/ / )
47
+ attachment . source . to_s
75
48
else
76
- RubyLLM . logger . warn "Invalid image format: #{ image } "
77
- nil
49
+ "data:#{ attachment . mime_type } ;base64,#{ attachment . encoded } "
78
50
end
51
+ {
52
+ type : "image_url" ,
53
+ image_url : url
54
+ }
55
+ end
56
+
57
+ def supports_image? ( model_id )
58
+ # Check if the model supports vision according to the model capabilities
59
+ capabilities . supports_vision? ( model_id )
60
+ end
61
+
62
+ def supports_audio? ( model_id )
63
+ # Check if the model supports audio according to the model capabilities
64
+ capabilities . supports_audio? ( model_id )
65
+ end
66
+
67
+ def capabilities
68
+ RubyLLM ::Providers ::Mistral ::Capabilities
79
69
end
80
70
end
81
71
end
0 commit comments