Skip to content

Commit a396bd6

Browse files
committed
Apply the attachments refactoring. Rebase on origin/main
1 parent 2076f27 commit a396bd6

7 files changed

+602
-104
lines changed

lib/ruby_llm/providers/mistral/chat.rb

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -59,29 +59,15 @@ def render_payload(messages, tools:, temperature:, model:, stream: nil,
5959

6060
def render_message(message)
6161
result = {}
62-
result[:role] = message.role
62+
result[:role] = message.role.to_s # Ensure role is a string for API compliance
6363

6464
# If the message content is a RubyLLM::Content with attachments, convert to multimodal array
6565
if message.content.is_a?(RubyLLM::Content)
6666
content = message.content
67-
if content.attachments.any?
68-
multimodal = []
69-
multimodal << { type: "text", text: content.text } if content.text
70-
content.attachments.each do |attachment|
71-
case attachment
72-
when RubyLLM::Attachments::Image
73-
multimodal << { type: "image", source: attachment.url? ? { url: attachment.source } : { type: 'base64', media_type: attachment.mime_type, data: attachment.encoded } }
74-
# Add more attachment types if Mistral supports them
75-
end
76-
end
77-
# Always format multimodal for Mistral
78-
result[:content] = Mistral::Media.format_content(multimodal)
79-
else
80-
result[:content] = content.text
81-
end
67+
result[:content] = Mistral::Media.format_content(content)
8268
elsif message.content.is_a?(Array)
8369
# Multimodal content: format each part
84-
formatted_content = Mistral::Media.format_content(message.content.compact)
70+
formatted_content = message.content.compact
8571
result[:content] = formatted_content unless formatted_content.empty?
8672
else
8773
# Simple text content
@@ -96,6 +82,7 @@ def render_message(message)
9682
tool_calls = message.tool_calls.is_a?(Hash) ? message.tool_calls.values : message.tool_calls
9783
result[:tool_calls] = tool_calls.map { |tc| render_tool_call(tc) }
9884
end
85+
9986
result.compact
10087
end
10188

lib/ruby_llm/providers/mistral/media.rb

Lines changed: 49 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2,80 +2,70 @@ module RubyLLM
22
module Providers
33
module Mistral
44
# Media handling for Mistral models
5-
#
6-
# NOTE: There's currently an issue with Pixtral vision capabilities in the test suite.
7-
# The content array contains nil values when an image is attached, which causes the API to return errors.
8-
# This might be due to how images are being attached or formatted in the Content class.
9-
# The test in chat_content_spec.rb for 'pixtral-12b-latest can understand images' currently fails.
10-
# The debug output shows: content: [{type: "text", text: "..."}, nil] where the second element should be the image.
11-
# This likely requires fixes in the core library's Content class or how it interacts with provider-specific formatting.
5+
126
module Media
137
module_function
148

15-
def supports_image?(model_id)
16-
# Check if the model supports vision according to the model capabilities
17-
capabilities.supports_vision?(model_id)
18-
end
19-
20-
def supports_audio?(model_id)
21-
# Check if the model supports audio according to the model capabilities
22-
capabilities.supports_audio?(model_id)
23-
end
24-
25-
# Moved from chat.rb
269
def format_content(content)
27-
return content unless content.is_a?(Array)
10+
return content unless content.is_a?(Content)
2811

29-
RubyLLM.logger.debug "Formatting multimodal content: #{content.inspect}"
12+
parts = []
13+
parts << { type: "text", text: content.text } if content.text
3014

31-
# Filter out nil values
32-
filtered_content = content.compact
33-
34-
RubyLLM.logger.debug "Filtered content: #{filtered_content.inspect}"
35-
36-
filtered_content.map do |item|
37-
if item.is_a?(Hash) && item[:type] == "image"
38-
format_image_content(item)
15+
content.attachments.each do |attachment|
16+
case attachment.type
17+
when :image
18+
parts << format_image(attachment)
19+
when :pdf
20+
# Mistral doesn't currently support PDFs other than
21+
# through the OCR API
22+
raise UnsupportedAttachmentError, attachment.type
23+
when :audio
24+
# Mistral doesn't currently support audio
25+
raise UnsupportedAttachmentError, attachment.type
26+
when :text
27+
# Mistral doesn't support text files as attachments, so we'll append to the text
28+
if parts.first && parts.first[:type] == "text"
29+
parts.first[:text] += "\n\n" + Utils.format_text_file_for_llm(attachment)
30+
else
31+
parts << { type: "text", text: Utils.format_text_file_for_llm(attachment) }
32+
end
3933
else
40-
item # Pass through text or other types
34+
raise UnsupportedAttachmentError, attachment.type
4135
end
4236
end
37+
38+
# Filter out nil values and return the formatted content array
39+
parts.compact
4340
end
4441

4542
# Format image according to Mistral API requirements
46-
# @param image [Hash] Image data hash from Content class
43+
# @param image [Attachment] Image attachment
4744
# @return [Hash] Formatted image data for Mistral API
48-
def format_image_content(image)
49-
RubyLLM.logger.debug "Formatting image content: #{image.inspect}"
50-
51-
if image[:source].is_a?(Hash)
52-
if image[:source][:url]
53-
# Direct URL from source hash
54-
{
55-
type: "image_url",
56-
image_url: image[:source][:url]
57-
}
58-
elsif image[:source][:type] == 'base64'
59-
# Base64 data from source hash
60-
data_uri = "data:#{image[:source][:media_type]};base64,#{image[:source][:data]}"
61-
{
62-
type: "image_url",
63-
image_url: data_uri
64-
}
65-
else
66-
RubyLLM.logger.warn "Invalid image source format: #{image[:source]}"
67-
nil
68-
end
69-
elsif image[:source].is_a?(String)
70-
# Direct URL string
71-
{
72-
type: "image_url",
73-
image_url: image[:source]
74-
}
45+
def format_image(attachment)
46+
url = if attachment.respond_to?(:source) && attachment.source.to_s.match?(/^https?:\/\//)
47+
attachment.source.to_s
7548
else
76-
RubyLLM.logger.warn "Invalid image format: #{image}"
77-
nil
49+
"data:#{attachment.mime_type};base64,#{attachment.encoded}"
7850
end
51+
{
52+
type: "image_url",
53+
image_url: url
54+
}
55+
end
56+
57+
def supports_image?(model_id)
58+
# Check if the model supports vision according to the model capabilities
59+
capabilities.supports_vision?(model_id)
60+
end
61+
62+
def supports_audio?(model_id)
63+
# Check if the model supports audio according to the model capabilities
64+
capabilities.supports_audio?(model_id)
65+
end
66+
67+
def capabilities
68+
RubyLLM::Providers::Mistral::Capabilities
7969
end
8070
end
8171
end

lib/ruby_llm/providers/mistral/models.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# frozen_string_literal: true
22

3+
require 'ruby_llm/model/info'
4+
35
module RubyLLM
46
module Providers
57
module Mistral
@@ -20,7 +22,7 @@ def parse_list_models_response(response, slug, capabilities)
2022
def parse_model(model, slug, capabilities)
2123
id = model["id"]
2224

23-
ModelInfo.new(
25+
ModelInfoWithFunctions.new(
2426
id: id,
2527
created_at: model["created"] ? Time.at(model["created"]) : nil,
2628
display_name: capabilities.format_display_name(id),
@@ -36,6 +38,12 @@ def parse_model(model, slug, capabilities)
3638
capabilities: capabilities.capabilities_for(id),
3739
)
3840
end
41+
42+
class ModelInfoWithFunctions < RubyLLM::Model::Info
43+
def supports_functions?
44+
RubyLLM::Providers::Mistral::Capabilities.supports_functions?(id)
45+
end
46+
end
3947
end
4048
end
4149
end

0 commit comments

Comments
 (0)