Skip to content

Commit 22c1c71

Browse files
committed
image support
1 parent 8de5f22 commit 22c1c71

File tree

5 files changed

+130
-1
lines changed

5 files changed

+130
-1
lines changed

Sources/OpenAIStructure/OpenAIRequest.swift

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,80 @@ public enum OpenAIRequest {
6363
let result = try JSONDecoder().decode(T.self, from: text.data(using: .utf8)!)
6464
return result
6565
}
66+
67+
struct PayloadWithImage<Schema: Codable>: Codable {
68+
let model: String
69+
let input: [Input]
70+
let reasoning: Reasoning
71+
let instructions: String
72+
let text: TextObject
73+
struct Reasoning: Codable {
74+
let effort: String?
75+
}
76+
struct TextObject: Codable {
77+
let format: Schema
78+
}
79+
struct Input: Codable {
80+
var role: String
81+
var content: [Content]
82+
init(content: [Content]) {
83+
self.role = "user"
84+
self.content = content
85+
}
86+
struct Content: Codable {
87+
var type: String
88+
var detail: String?
89+
var image_url: String?
90+
var text: String?
91+
}
92+
}
93+
}
94+
95+
public static func request<T: OpenAIStructureObject>(input: String, instructions: String, image: Data, model: OpenAIModel, object: T.Type, apiKey: String) async throws -> T {
96+
var req = URLRequest(url: endpoint)
97+
req.httpMethod = "POST"
98+
req.addValue("application/json", forHTTPHeaderField: "Content-Type")
99+
req.addValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
100+
let payload: PayloadWithImage<SchemaRequest>
101+
if model.hasReasoning, case let OpenAIModel.o4_mini(reasoningEffort) = model {
102+
payload = PayloadWithImage(
103+
model: model.modelName,
104+
input: [
105+
.init(
106+
content: [
107+
.init(type: "input_text", text: input),
108+
.init(type: "input_image", detail: "high", image_url: "data:image/jpeg;base64," + image.base64EncodedString())
109+
]
110+
)
111+
],
112+
reasoning: .init(effort: reasoningEffort.rawValue),
113+
instructions: instructions,
114+
text: .init(format: T.schema)
115+
)
116+
} else {
117+
payload = PayloadWithImage(
118+
model: model.modelName,
119+
input: [
120+
.init(
121+
content: [
122+
.init(type: "input_text", text: input),
123+
.init(type: "input_image", detail: "high", image_url: "data:image/jpeg;base64," + image.base64EncodedString())
124+
]
125+
)
126+
],
127+
reasoning: .init(effort: nil),
128+
instructions: instructions,
129+
text: .init(format: T.schema)
130+
)
131+
}
132+
req.httpBody = try JSONEncoder().encode(payload)
133+
req.timeoutInterval = 300
134+
let (data, _) = try await URLSession.shared.data(for: req)
135+
guard let decoded = try? JSONDecoder().decode(OpenAIResponse.self, from: data) else {
136+
throw OpenAIStructureError(message: String(data: data, encoding: .utf8) ?? "error")
137+
}
138+
let text = decoded.output.last!.content!.last!.text!
139+
let result = try JSONDecoder().decode(T.self, from: text.data(using: .utf8)!)
140+
return result
141+
}
66142
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"info" : {
3+
"author" : "xcode",
4+
"version" : 1
5+
}
6+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"images" : [
3+
{
4+
"filename" : "apple.png",
5+
"idiom" : "universal",
6+
"scale" : "1x"
7+
},
8+
{
9+
"idiom" : "universal",
10+
"scale" : "2x"
11+
},
12+
{
13+
"idiom" : "universal",
14+
"scale" : "3x"
15+
}
16+
],
17+
"info" : {
18+
"author" : "xcode",
19+
"version" : 1
20+
}
21+
}
1.31 MB
Loading

Sources/OpenAIStructureClient/main.swift

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import OpenAIStructure
22
import Foundation
3+
import AppKit
34

45
@OpenAIStructure(name: "talk_theme_pack")
56
struct TalkThemePack {
@@ -51,10 +52,35 @@ do {
5152
instructions: "Provide some talk themes.",
5253
model: .gpt4_1_nano,
5354
object: TalkThemePack.self,
54-
apiKey: "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"
55+
apiKey: ""
5556
)
5657
print(result)
5758
} catch {
5859
print(error)
5960
}
6061

62+
print("-------image-------")
63+
64+
@OpenAIStructure(name: "describe_image")
65+
struct ImageDescriber {
66+
67+
@Field("description of the image")
68+
var image_description: String
69+
}
70+
71+
do {
72+
let cgImage = Bundle.module.image(forResource: "apple")!.cgImage(forProposedRect: nil, context: nil, hints: nil)!
73+
let rep = NSBitmapImageRep(cgImage: cgImage)
74+
let imageData = rep.representation(using: .png, properties: [:])!
75+
let result = try await OpenAIRequest.request(
76+
input: "Talk theme related to food.",
77+
instructions: "Provide some talk themes.",
78+
image: imageData,
79+
model: .gpt4_1_nano,
80+
object: ImageDescriber.self,
81+
apiKey: ""
82+
)
83+
print(result)
84+
} catch {
85+
print(error)
86+
}

0 commit comments

Comments
 (0)