Skip to content

Commit a4fa09e

Browse files
committed
resolve image test failures
1 parent 4a682ab commit a4fa09e

File tree

5 files changed

+193
-24
lines changed

5 files changed

+193
-24
lines changed

lib/ruby_llm/models.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4369,7 +4369,7 @@
43694369
"max_tokens": 28800,
43704370
"type": "chat",
43714371
"family": "small",
4372-
"supports_vision": false,
4372+
"supports_vision": true,
43734373
"supports_functions": true,
43744374
"supports_json_mode": true,
43754375
"input_price_per_million": 0.2,
@@ -4388,7 +4388,7 @@
43884388
"max_tokens": 28800,
43894389
"type": "chat",
43904390
"family": "small",
4391-
"supports_vision": false,
4391+
"supports_vision": true,
43924392
"supports_functions": true,
43934393
"supports_json_mode": true,
43944394
"input_price_per_million": 0.2,
@@ -4407,7 +4407,7 @@
44074407
"max_tokens": 28800,
44084408
"type": "chat",
44094409
"family": "small",
4410-
"supports_vision": false,
4410+
"supports_vision": true,
44114411
"supports_functions": true,
44124412
"supports_json_mode": true,
44134413
"input_price_per_million": 0.2,

lib/ruby_llm/providers/mistral/capabilities.rb

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,14 @@ def output_price_for(model_id)
5151
# @param model_id [String] the model identifier
5252
# @return [Boolean] true if the model supports vision
5353
def supports_vision?(model_id)
54-
# Determine vision support based on model ID pattern
55-
model_id.match?(/pixtral/)
54+
# Explicitly match the known vision-capable models
55+
vision_models = [
56+
'pixtral-12b-latest',
57+
'pixtral-large-latest',
58+
'mistral-medium-latest',
59+
'mistral-small-latest'
60+
]
61+
vision_models.any? { |id| model_id.include?(id) }
5662
end
5763

5864
# Determines if the model supports function calling
@@ -96,10 +102,11 @@ def model_type(model_id)
96102
end
97103

98104
# Determines if the model supports structured output
105+
# based on the docs in https://docs.mistral.ai/capabilities/structured-output/structured_output_overview/ all mistral models support JSON mode
99106
# @param model_id [String] the model identifier
100107
# @return [Boolean] true if the model supports structured output
101108
def supports_structured_output?(model_id)
102-
!model_id.match?(/embed|moderation/)
109+
true
103110
end
104111

105112
# Determines the model family for pricing and capability lookup

lib/ruby_llm/providers/mistral/chat.rb

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -61,25 +61,27 @@ def render_message(message)
6161
result = {}
6262
result[:role] = message.role
6363

64-
# Handle content formatting (text or multimodal array)
65-
if message.content.is_a?(Array)
66-
# Multimodal content: format each part
67-
formatted_content = message.content.compact.map do |item|
68-
if item.is_a?(Hash) && item[:type] == "image"
69-
# Use the media helper to format image data correctly for Mistral
70-
Mistral::Media.format_image_content(item)
71-
elsif item.is_a?(Hash) && item[:type] == "text"
72-
# Pass through text hashes
73-
item
74-
elsif item.is_a?(String)
75-
# Wrap plain strings in the text hash format
76-
{ type: "text", text: item }
77-
else
78-
# Skip unknown item types in content array
79-
RubyLLM.logger.warn "[MISTRAL WARN] Unknown item type in message content array: #{item.inspect}"
80-
nil
64+
# If the message content is a RubyLLM::Content with attachments, convert to multimodal array
65+
if message.content.is_a?(RubyLLM::Content)
66+
content = message.content
67+
if content.attachments.any?
68+
multimodal = []
69+
multimodal << { type: "text", text: content.text } if content.text
70+
content.attachments.each do |attachment|
71+
case attachment
72+
when RubyLLM::Attachments::Image
73+
multimodal << { type: "image", source: attachment.url? ? { url: attachment.source } : { type: 'base64', media_type: attachment.mime_type, data: attachment.encoded } }
74+
# Add more attachment types if Mistral supports them
75+
end
8176
end
82-
end.compact # Remove any nils from formatting errors
77+
# Always format multimodal for Mistral
78+
result[:content] = Mistral::Media.format_content(multimodal)
79+
else
80+
result[:content] = content.text
81+
end
82+
elsif message.content.is_a?(Array)
83+
# Multimodal content: format each part
84+
formatted_content = Mistral::Media.format_content(message.content.compact)
8385
result[:content] = formatted_content unless formatted_content.empty?
8486
else
8587
# Simple text content

0 commit comments

Comments
 (0)