Skip to content

Commit 773aaec

Browse files
committed
Add GenAIVision Example
1 parent 4662360 commit 773aaec

File tree

1 file changed

+261
-0
lines changed
  • Arduino_package/hardware/libraries/NeuralNetwork/examples/GenAIVision

1 file changed

+261
-0
lines changed
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
/*
2+
3+
This sketch shows the example of image prompts openAI Vision and Gemini Vision API
4+
5+
openAI Vision
6+
https://platform.openai.com/docs/guides/vision
7+
8+
Gemini Vision
9+
https://ai.google.dev/gemini-api/docs/vision
10+
11+
Example Guide: TBD
12+
13+
Credit : ChungYi Fu (Kaohsiung, Taiwan)
14+
15+
*/
16+
17+
String openAI_key = ""; // paste your generated openAI key here
18+
String Gemini_key = ""; // paste your generated Gemini key here
19+
char wifi_ssid[] = "Network_SSID5"; // change to your network SSID
20+
char wifi_pass[] = "Password"; // change to your network password
21+
22+
#include <WiFi.h>
23+
WiFiSSLClient client;
24+
#include <ArduinoJson.h>
25+
#include "Base64.h"
26+
#include "VideoStream.h"
27+
VideoSetting config(768, 768, CAM_FPS, VIDEO_JPEG, 1);
28+
#define CHANNEL 0
29+
30+
uint32_t img_addr = 0;
31+
uint32_t img_len = 0;
32+
33+
void initWiFi()
34+
{
35+
for (int i = 0; i < 2; i++) {
36+
WiFi.begin(wifi_ssid, wifi_pass);
37+
38+
delay(1000);
39+
Serial.println("");
40+
Serial.print("Connecting to ");
41+
Serial.println(wifi_ssid);
42+
43+
long int StartTime = millis();
44+
while (WiFi.status() != WL_CONNECTED) {
45+
delay(500);
46+
if ((StartTime + 5000) < millis()) {
47+
break;
48+
}
49+
}
50+
51+
if (WiFi.status() == WL_CONNECTED) {
52+
Serial.println("");
53+
Serial.println("STAIP address: ");
54+
Serial.println(WiFi.localIP());
55+
Serial.println("");
56+
57+
break;
58+
}
59+
}
60+
}
61+
62+
String SendStillToOpenaiVision(String key, String message, bool capture)
63+
{
64+
const char *myDomain = "api.openai.com";
65+
String getResponse = "", Feedback = "";
66+
Serial.println("Connect to " + String(myDomain));
67+
if (client.connect(myDomain, 443)) {
68+
Serial.println("Connection successful");
69+
if (capture) {
70+
Camera.getImage(0, &img_addr, &img_len);
71+
}
72+
uint8_t *fbBuf = (uint8_t *)img_addr;
73+
size_t fbLen = img_len;
74+
75+
char *input = (char *)fbBuf;
76+
char output[base64_enc_len(3)];
77+
String imageFile = "data:image/jpeg;base64,";
78+
for (int i = 0; i < fbLen; i++) {
79+
base64_encode(output, (input++), 3);
80+
if (i % 3 == 0) {
81+
imageFile += String(output);
82+
}
83+
}
84+
String Data = "{\"model\": \"gpt-4o-mini\", \"messages\": [{\"role\": \"user\",\"content\": [{ \"type\": \"text\", \"text\": \"" + message + "\"},{\"type\": \"image_url\", \"image_url\": {\"url\": \"" + imageFile + "\"}}]}]}";
85+
86+
client.println("POST /v1/chat/completions HTTP/1.1");
87+
client.println("Host: " + String(myDomain));
88+
client.println("Authorization: Bearer " + key);
89+
client.println("Content-Type: application/json; charset=utf-8");
90+
client.println("Content-Length: " + String(Data.length()));
91+
client.println("Connection: close");
92+
client.println();
93+
94+
int Index;
95+
for (Index = 0; Index < Data.length(); Index = Index + 1024) {
96+
client.print(Data.substring(Index, Index + 1024));
97+
}
98+
99+
int waitTime = 10000;
100+
long startTime = millis();
101+
boolean state = false;
102+
boolean markState = false;
103+
while ((startTime + waitTime) > millis()) {
104+
Serial.print(".");
105+
delay(100);
106+
while (client.available()) {
107+
char c = client.read();
108+
if (String(c) == "{") {
109+
markState = true;
110+
}
111+
if (state == true && markState == true) {
112+
Feedback += String(c);
113+
}
114+
if (c == '\n') {
115+
if (getResponse.length() == 0) {
116+
state = true;
117+
}
118+
getResponse = "";
119+
} else if (c != '\r') {
120+
getResponse += String(c);
121+
}
122+
startTime = millis();
123+
}
124+
if (Feedback.length() > 0) {
125+
break;
126+
}
127+
}
128+
Serial.println();
129+
client.stop();
130+
131+
JsonObject obj;
132+
DynamicJsonDocument doc(4096);
133+
deserializeJson(doc, Feedback);
134+
obj = doc.as<JsonObject>();
135+
getResponse = obj["choices"][0]["message"]["content"].as<String>();
136+
if (getResponse == "null") {
137+
getResponse = obj["error"]["message"].as<String>();
138+
}
139+
} else {
140+
getResponse = "Connected to " + String(myDomain) + " failed.";
141+
Serial.println("Connected to " + String(myDomain) + " failed.");
142+
}
143+
144+
return getResponse;
145+
}
146+
147+
String SendStillToGeminiVision(String key, String message, bool capture)
148+
{
149+
const char *myDomain = "generativelanguage.googleapis.com";
150+
String getResponse = "", Feedback = "";
151+
Serial.println("Connect to " + String(myDomain));
152+
if (client.connect(myDomain, 443)) {
153+
Serial.println("Connection successful");
154+
if (capture) {
155+
Camera.getImage(0, &img_addr, &img_len);
156+
}
157+
uint8_t *fbBuf = (uint8_t *)img_addr;
158+
size_t fbLen = img_len;
159+
160+
char *input = (char *)fbBuf;
161+
char output[base64_enc_len(3)];
162+
String imageFile = "";
163+
for (int i = 0; i < fbLen; i++) {
164+
base64_encode(output, (input++), 3);
165+
if (i % 3 == 0) {
166+
imageFile += String(output);
167+
}
168+
}
169+
String Data = "{\"contents\": [{\"parts\": [{\"text\": \"" + message + "\"}, {\"inline_data\": {\"mime_type\":\"image/jpeg\",\"data\":\"" + imageFile + "\"}}]}]}";
170+
171+
client.println("POST /v1beta/models/gemini-1.5-flash-latest:generateContent?key=" + key + " HTTP/1.1");
172+
client.println("Host: " + String(myDomain));
173+
client.println("Content-Type: application/json; charset=utf-8");
174+
client.println("Content-Length: " + String(Data.length()));
175+
client.println("Connection: close");
176+
client.println();
177+
178+
int Index;
179+
for (Index = 0; Index < Data.length(); Index = Index + 1024) {
180+
client.print(Data.substring(Index, Index + 1024));
181+
}
182+
183+
int waitTime = 10000;
184+
long startTime = millis();
185+
boolean state = false;
186+
boolean markState = false;
187+
while ((startTime + waitTime) > millis()) {
188+
Serial.print(".");
189+
delay(100);
190+
while (client.available()) {
191+
char c = client.read();
192+
if (String(c) == "{") {
193+
markState = true;
194+
}
195+
if (state == true && markState == true) {
196+
Feedback += String(c);
197+
}
198+
if (c == '\n') {
199+
if (getResponse.length() == 0) {
200+
state = true;
201+
}
202+
getResponse = "";
203+
} else if (c != '\r') {
204+
getResponse += String(c);
205+
}
206+
startTime = millis();
207+
}
208+
if (Feedback.length() > 0) {
209+
break;
210+
}
211+
}
212+
Serial.println();
213+
client.stop();
214+
215+
JsonObject obj;
216+
DynamicJsonDocument doc(4096);
217+
deserializeJson(doc, Feedback);
218+
obj = doc.as<JsonObject>();
219+
getResponse = obj["candidates"][0]["content"]["parts"][0]["text"].as<String>();
220+
if (getResponse == "null") {
221+
getResponse = obj["error"]["message"].as<String>();
222+
}
223+
} else {
224+
getResponse = "Connected to " + String(myDomain) + " failed.";
225+
Serial.println("Connected to " + String(myDomain) + " failed.");
226+
}
227+
228+
return getResponse;
229+
}
230+
231+
void setup()
232+
{
233+
Serial.begin(115200);
234+
235+
initWiFi();
236+
237+
config.setRotation(0);
238+
Camera.configVideoChannel(CHANNEL, config);
239+
Camera.videoInit();
240+
Camera.channelBegin(CHANNEL);
241+
Camera.printInfo();
242+
243+
delay(5000);
244+
245+
// Vision prompt using same taken image
246+
Camera.getImage(0, &img_addr, &img_len);
247+
Serial.println((SendStillToOpenaiVision(openAI_key, "Please describe the image, and if there is text, please summarize the content", 0)));
248+
Serial.println((SendStillToGeminiVision(Gemini_key, "Please describe the image, and if there is text, please summarize the content", 0)));
249+
250+
/*
251+
// Vision prompt using different image
252+
Serial.println((SendStillToOpenaiVision(openAI_key, "Please describe the image, and if there is text, please summarize the content", 1)));
253+
delay(5000);
254+
Serial.println((SendStillToGeminiVision(Gemini_key, "Please describe the image, and if there is text, please summarize the content", 1)));
255+
*/
256+
}
257+
258+
void loop()
259+
{
260+
// do nothing
261+
}

0 commit comments

Comments
 (0)