1
1
/*
2
2
3
- This sketch shows the example of image prompts openAI Vision and Gemini Vision API
3
+ This sketch shows the example of image prompts using APIs.
4
4
5
- openAI Vision
5
+ openAI platform - openAI vision
6
6
https://platform.openai.com/docs/guides/vision
7
7
8
- Gemini Vision
8
+ Google AI Studio - Gemini vision
9
9
https://ai.google.dev/gemini-api/docs/vision
10
10
11
+ GroqCloud - Llama vision
12
+ https://console.groq.com/docs/overview
13
+
11
14
Example Guide: TBD
12
15
13
16
Credit : ChungYi Fu (Kaohsiung, Taiwan)
14
17
15
18
*/
16
19
17
- String openAI_key = " " ; // paste your generated openAI key here
18
- String Gemini_key = " " ; // paste your generated Gemini key here
20
+ String openAI_key = " " ; // paste your generated openAI API key here
21
+ String Gemini_key = " " ; // paste your generated Gemini API key here
22
+ String Llama_key = " " ; // paste your generated Llama API key here
19
23
char wifi_ssid[] = " Network_SSID5" ; // change to your network SSID
20
24
char wifi_pass[] = " Password" ; // change to your network password
21
25
22
26
#include < WiFi.h>
23
- WiFiSSLClient client;
24
- #include < ArduinoJson.h>
25
- #include " Base64.h"
27
+ #include " NNGenAIVision.h"
26
28
#include " VideoStream.h"
29
+ WiFiSSLClient client;
30
+ NNGenAIVision llm;
27
31
VideoSetting config (768 , 768 , CAM_FPS, VIDEO_JPEG, 1 );
28
32
#define CHANNEL 0
29
33
30
34
uint32_t img_addr = 0 ;
31
35
uint32_t img_len = 0 ;
32
36
37
+ String prompt = " Please describe the image, and if there is a text, please summarize the content" ;
38
+
33
39
void initWiFi ()
34
40
{
35
41
for (int i = 0 ; i < 2 ; i++) {
@@ -59,175 +65,6 @@ void initWiFi()
59
65
}
60
66
}
61
67
62
- String SendStillToOpenaiVision (String key, String message, bool capture)
63
- {
64
- const char *myDomain = " api.openai.com" ;
65
- String getResponse = " " , Feedback = " " ;
66
- Serial.println (" Connect to " + String (myDomain));
67
- if (client.connect (myDomain, 443 )) {
68
- Serial.println (" Connection successful" );
69
- if (capture) {
70
- Camera.getImage (0 , &img_addr, &img_len);
71
- }
72
- uint8_t *fbBuf = (uint8_t *)img_addr;
73
- size_t fbLen = img_len;
74
-
75
- char *input = (char *)fbBuf;
76
- char output[base64_enc_len (3 )];
77
- String imageFile = " data:image/jpeg;base64," ;
78
- for (int i = 0 ; i < fbLen; i++) {
79
- base64_encode (output, (input++), 3 );
80
- if (i % 3 == 0 ) {
81
- imageFile += String (output);
82
- }
83
- }
84
- String Data = " {\" model\" : \" gpt-4o-mini\" , \" messages\" : [{\" role\" : \" user\" ,\" content\" : [{ \" type\" : \" text\" , \" text\" : \" " + message + " \" },{\" type\" : \" image_url\" , \" image_url\" : {\" url\" : \" " + imageFile + " \" }}]}]}" ;
85
-
86
- client.println (" POST /v1/chat/completions HTTP/1.1" );
87
- client.println (" Host: " + String (myDomain));
88
- client.println (" Authorization: Bearer " + key);
89
- client.println (" Content-Type: application/json; charset=utf-8" );
90
- client.println (" Content-Length: " + String (Data.length ()));
91
- client.println (" Connection: close" );
92
- client.println ();
93
-
94
- int Index;
95
- for (Index = 0 ; Index < Data.length (); Index = Index + 1024 ) {
96
- client.print (Data.substring (Index, Index + 1024 ));
97
- }
98
-
99
- int waitTime = 10000 ;
100
- long startTime = millis ();
101
- boolean state = false ;
102
- boolean markState = false ;
103
- while ((startTime + waitTime) > millis ()) {
104
- Serial.print (" ." );
105
- delay (100 );
106
- while (client.available ()) {
107
- char c = client.read ();
108
- if (String (c) == " {" ) {
109
- markState = true ;
110
- }
111
- if (state == true && markState == true ) {
112
- Feedback += String (c);
113
- }
114
- if (c == ' \n ' ) {
115
- if (getResponse.length () == 0 ) {
116
- state = true ;
117
- }
118
- getResponse = " " ;
119
- } else if (c != ' \r ' ) {
120
- getResponse += String (c);
121
- }
122
- startTime = millis ();
123
- }
124
- if (Feedback.length () > 0 ) {
125
- break ;
126
- }
127
- }
128
- Serial.println ();
129
- client.stop ();
130
-
131
- JsonObject obj;
132
- DynamicJsonDocument doc (4096 );
133
- deserializeJson (doc, Feedback);
134
- obj = doc.as <JsonObject>();
135
- getResponse = obj[" choices" ][0 ][" message" ][" content" ].as <String>();
136
- if (getResponse == " null" ) {
137
- getResponse = obj[" error" ][" message" ].as <String>();
138
- }
139
- } else {
140
- getResponse = " Connected to " + String (myDomain) + " failed." ;
141
- Serial.println (" Connected to " + String (myDomain) + " failed." );
142
- }
143
-
144
- return getResponse;
145
- }
146
-
147
- String SendStillToGeminiVision (String key, String message, bool capture)
148
- {
149
- const char *myDomain = " generativelanguage.googleapis.com" ;
150
- String getResponse = " " , Feedback = " " ;
151
- Serial.println (" Connect to " + String (myDomain));
152
- if (client.connect (myDomain, 443 )) {
153
- Serial.println (" Connection successful" );
154
- if (capture) {
155
- Camera.getImage (0 , &img_addr, &img_len);
156
- }
157
- uint8_t *fbBuf = (uint8_t *)img_addr;
158
- size_t fbLen = img_len;
159
-
160
- char *input = (char *)fbBuf;
161
- char output[base64_enc_len (3 )];
162
- String imageFile = " " ;
163
- for (int i = 0 ; i < fbLen; i++) {
164
- base64_encode (output, (input++), 3 );
165
- if (i % 3 == 0 ) {
166
- imageFile += String (output);
167
- }
168
- }
169
- String Data = " {\" contents\" : [{\" parts\" : [{\" text\" : \" " + message + " \" }, {\" inline_data\" : {\" mime_type\" :\" image/jpeg\" ,\" data\" :\" " + imageFile + " \" }}]}]}" ;
170
-
171
- client.println (" POST /v1beta/models/gemini-1.5-flash-latest:generateContent?key=" + key + " HTTP/1.1" );
172
- client.println (" Host: " + String (myDomain));
173
- client.println (" Content-Type: application/json; charset=utf-8" );
174
- client.println (" Content-Length: " + String (Data.length ()));
175
- client.println (" Connection: close" );
176
- client.println ();
177
-
178
- int Index;
179
- for (Index = 0 ; Index < Data.length (); Index = Index + 1024 ) {
180
- client.print (Data.substring (Index, Index + 1024 ));
181
- }
182
-
183
- int waitTime = 10000 ;
184
- long startTime = millis ();
185
- boolean state = false ;
186
- boolean markState = false ;
187
- while ((startTime + waitTime) > millis ()) {
188
- Serial.print (" ." );
189
- delay (100 );
190
- while (client.available ()) {
191
- char c = client.read ();
192
- if (String (c) == " {" ) {
193
- markState = true ;
194
- }
195
- if (state == true && markState == true ) {
196
- Feedback += String (c);
197
- }
198
- if (c == ' \n ' ) {
199
- if (getResponse.length () == 0 ) {
200
- state = true ;
201
- }
202
- getResponse = " " ;
203
- } else if (c != ' \r ' ) {
204
- getResponse += String (c);
205
- }
206
- startTime = millis ();
207
- }
208
- if (Feedback.length () > 0 ) {
209
- break ;
210
- }
211
- }
212
- Serial.println ();
213
- client.stop ();
214
-
215
- JsonObject obj;
216
- DynamicJsonDocument doc (4096 );
217
- deserializeJson (doc, Feedback);
218
- obj = doc.as <JsonObject>();
219
- getResponse = obj[" candidates" ][0 ][" content" ][" parts" ][0 ][" text" ].as <String>();
220
- if (getResponse == " null" ) {
221
- getResponse = obj[" error" ][" message" ].as <String>();
222
- }
223
- } else {
224
- getResponse = " Connected to " + String (myDomain) + " failed." ;
225
- Serial.println (" Connected to " + String (myDomain) + " failed." );
226
- }
227
-
228
- return getResponse;
229
- }
230
-
231
68
void setup ()
232
69
{
233
70
Serial.begin (115200 );
@@ -244,15 +81,15 @@ void setup()
244
81
245
82
// Vision prompt using same taken image
246
83
Camera.getImage (0 , &img_addr, &img_len);
247
- Serial.println ((SendStillToOpenaiVision (openAI_key, " Please describe the image, and if there is text, please summarize the content" , 0 )));
248
- Serial.println ((SendStillToGeminiVision (Gemini_key, " Please describe the image, and if there is text, please summarize the content" , 0 )));
249
84
250
- /*
251
- // Vision prompt using different image
252
- Serial.println((SendStillToOpenaiVision(openAI_key, "Please describe the image, and if there is text, please summarize the content", 1)));
253
- delay(5000);
254
- Serial.println((SendStillToGeminiVision(Gemini_key, "Please describe the image, and if there is text, please summarize the content", 1)));
255
- */
85
+ // openAI vision prompt
86
+ // Serial.println((llm.openaivision(openAI_key, prompt, &img_addr, img_len, client)));
87
+
88
+ // Gemini vision prompt
89
+ // Serial.println((llm.geminivision(Gemini_key, prompt, img_addr, img_len, client)));
90
+
91
+ // Llama vision prompt
92
+ Serial.println ((llm.llamavision (Llama_key, prompt, img_addr, img_len, client)));
256
93
}
257
94
258
95
void loop ()
0 commit comments