Skip to content

Commit 03b078b

Browse files
authored
Merge pull request #2744 from adafruit/memento_openai
adding code and prompts for memento openai project
2 parents 85e851d + 6564b69 commit 03b078b

File tree

2 files changed

+250
-0
lines changed

2 files changed

+250
-0
lines changed
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
# SPDX-FileCopyrightText: 2024 Liz Clark for Adafruit Industries
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
import os
6+
import time
7+
import ssl
8+
import binascii
9+
import wifi
10+
import vectorio
11+
import socketpool
12+
import adafruit_requests
13+
import displayio
14+
from jpegio import JpegDecoder
15+
from adafruit_display_text import label, wrap_text_to_lines
16+
import terminalio
17+
import adafruit_pycamera
18+
19+
# scale for displaying returned text from OpenAI
20+
text_scale = 2
21+
22+
# OpenAI key and prompts from settings.toml
23+
openai_api_key = os.getenv("OPENAI_API_KEY")
24+
alt_text_prompt = os.getenv("ALT_TEXT_PROMPT")
25+
haiku_prompt = os.getenv("HAIKU_PROMPT")
26+
cable_prompt = os.getenv("CABLE_PROMPT")
27+
translate_prompt = os.getenv("TRANSLATE_PROMPT")
28+
alien_prompt = os.getenv("ALIEN_PROMPT")
29+
weird_prompt = os.getenv("WEIRD_PROMPT")
30+
31+
prompts = [alt_text_prompt,
32+
haiku_prompt,
33+
cable_prompt,
34+
translate_prompt,
35+
alien_prompt,
36+
weird_prompt]
37+
num_prompts = len(prompts)
38+
prompt_index = 0
39+
prompt_labels = ["ALT_TEXT", "HAIKU", "CABLE_IDENTIFIER", "TRANSLATE", "ALIEN", "WEIRD?"]
40+
41+
# encode jpeg to base64 for OpenAI
42+
def encode_image(image_path):
43+
with open(image_path, 'rb') as image_file:
44+
image_data = image_file.read()
45+
base64_encoded_data = binascii.b2a_base64(image_data).decode('utf-8').rstrip()
46+
return base64_encoded_data
47+
48+
# view returned text on MEMENTO screen
49+
def view_text(the_text):
50+
rectangle = vectorio.Rectangle(pixel_shader=palette, width=240, height=240, x=0, y=0)
51+
pycam.splash.append(rectangle)
52+
the_text = "\n".join(wrap_text_to_lines(the_text, 20))
53+
if prompt_index == 1:
54+
the_text = the_text.replace("*", "\n")
55+
text_area = label.Label(terminalio.FONT, text=the_text,
56+
color=0xFFFFFF, x=2, y=10, scale=text_scale)
57+
pycam.splash.append(text_area)
58+
pycam.display.refresh()
59+
60+
# send image to OpenAI, print the returned text and save it as a text file
61+
def send_img(img, prompt):
62+
base64_image = encode_image(img)
63+
headers = {
64+
"Content-Type": "application/json",
65+
"Authorization": f"Bearer {openai_api_key}"
66+
}
67+
payload = {
68+
"model": "gpt-4-vision-preview",
69+
"messages": [
70+
{
71+
"role": "user",
72+
"content": [
73+
{
74+
"type": "text",
75+
"text": f"{prompt}"
76+
},
77+
{
78+
"type": "image_url",
79+
"image_url": {
80+
"url": f"data:image/jpeg;base64,{base64_image}"
81+
}
82+
}
83+
]
84+
}
85+
],
86+
"max_tokens": 300
87+
}
88+
response = requests.post("https://api.openai.com/v1/chat/completions",
89+
headers=headers, json=payload)
90+
json_openai = response.json()
91+
print(json_openai['choices'][0]['message']['content'])
92+
alt_text_file = img.replace('jpg', 'txt')
93+
alt_text_file = alt_text_file[:11] + f"_{prompt_labels[prompt_index]}" + alt_text_file[11:]
94+
if prompt_index == 5:
95+
alt_text_file = alt_text_file.replace("?", "")
96+
with open(alt_text_file, "a") as fp:
97+
fp.write(json_openai['choices'][0]['message']['content'])
98+
fp.flush()
99+
time.sleep(1)
100+
fp.close()
101+
view_text(json_openai['choices'][0]['message']['content'])
102+
# view images on sd card to re-send to OpenAI
103+
def load_image(bit, file):
104+
bit.fill(0b00000_000000_00000) # fill with a middle grey
105+
decoder.open(file)
106+
decoder.decode(bit, scale=0, x=0, y=0)
107+
pycam.blit(bit, y_offset=32)
108+
pycam.display.refresh()
109+
110+
print()
111+
print("Connecting to WiFi")
112+
wifi.radio.connect(os.getenv('CIRCUITPY_WIFI_SSID'), os.getenv('CIRCUITPY_WIFI_PASSWORD'))
113+
print("Connected to WiFi")
114+
pool = socketpool.SocketPool(wifi.radio)
115+
requests = adafruit_requests.Session(pool, ssl.create_default_context())
116+
117+
palette = displayio.Palette(1)
118+
palette[0] = 0x000000
119+
decoder = JpegDecoder()
120+
# used for showing images from sd card
121+
bitmap = displayio.Bitmap(240, 176, 65535)
122+
123+
pycam = adafruit_pycamera.PyCamera()
124+
pycam.mode = 0 # only mode 0 (JPEG) will work in this example
125+
126+
# Resolution of 320x240 is plenty for OpenAI
127+
pycam.resolution = 1 # 0-12 preset resolutions:
128+
# 0: 240x240, 1: 320x240, 2: 640x480, 3: 800x600, 4: 1024x768,
129+
# 5: 1280x720, 6: 1280x1024, 7: 1600x1200, 8: 1920x1080, 9: 2048x1536,
130+
# 10: 2560x1440, 11: 2560x1600, 12: 2560x1920
131+
# pycam.led_level = 1 # 0-4 preset brightness levels
132+
# pycam.led_color = 0 # 0-7 preset colors: 0: white, 1: green, 2: yellow, 3: red,
133+
# 4: pink, 5: blue, 6: teal, 7: rainbow
134+
pycam.effect = 0 # 0-7 preset FX: 0: normal, 1: invert, 2: b&w, 3: red,
135+
# 4: green, 5: blue, 6: sepia, 7: solarize
136+
# sort image files by numeric order
137+
all_images = [
138+
f"/sd/{filename}"
139+
for filename in os.listdir("/sd")
140+
if filename.lower().endswith(".jpg")
141+
]
142+
all_images.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
143+
# add label for selected prompt
144+
rect = vectorio.Rectangle(pixel_shader=palette, width=240, height=20, x=0, y=0)
145+
prompt_txt = label.Label(
146+
terminalio.FONT, text=prompt_labels[prompt_index], color=0xFF0055, x=10, y=15, scale=2
147+
)
148+
# pylint: disable=protected-access
149+
pycam._botbar.append(rect)
150+
pycam._botbar.append(prompt_txt)
151+
# pylint: enable=protected-access
152+
pycam.display.refresh()
153+
154+
view = False
155+
new_prompt = False
156+
file_index = -1
157+
158+
while True:
159+
if new_prompt:
160+
pycam.display_message("SEND?")
161+
if not view:
162+
if not new_prompt:
163+
pycam.blit(pycam.continuous_capture())
164+
pycam.keys_debounce()
165+
if pycam.shutter.long_press:
166+
pycam.autofocus()
167+
if pycam.shutter.short_count:
168+
try:
169+
pycam.display_message("snap", color=0x00DD00)
170+
pycam.capture_jpeg()
171+
pycam.live_preview_mode()
172+
except TypeError as exception:
173+
pycam.display_message("Failed", color=0xFF0000)
174+
time.sleep(0.5)
175+
pycam.live_preview_mode()
176+
except RuntimeError as exception:
177+
pycam.display_message("Error\nNo SD Card", color=0xFF0000)
178+
time.sleep(0.5)
179+
all_images = [
180+
f"/sd/{filename}"
181+
for filename in os.listdir("/sd")
182+
if filename.lower().endswith(".jpg")
183+
]
184+
all_images.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
185+
the_image = all_images[-1]
186+
pycam.display_message("OpenAI..", color=0x00DD00)
187+
send_img(the_image, prompts[prompt_index])
188+
view = True
189+
190+
if pycam.up.fell:
191+
prompt_index = (prompt_index - 1) % num_prompts
192+
prompt_txt.text = prompt_labels[prompt_index]
193+
pycam.display.refresh()
194+
195+
if pycam.down.fell:
196+
prompt_index = (prompt_index + 1) % num_prompts
197+
prompt_txt.text = prompt_labels[prompt_index]
198+
pycam.display.refresh()
199+
200+
if pycam.right.fell:
201+
if new_prompt:
202+
file_index = (file_index - -1) % -len(all_images)
203+
filename = all_images[file_index]
204+
load_image(bitmap, filename)
205+
else:
206+
prompt_index = (prompt_index + 1) % num_prompts
207+
prompt_txt.text = prompt_labels[prompt_index]
208+
pycam.display.refresh()
209+
210+
if pycam.left.fell:
211+
if new_prompt:
212+
file_index = (file_index + -1) % -len(all_images)
213+
filename = all_images[file_index]
214+
load_image(bitmap, filename)
215+
else:
216+
prompt_index = (prompt_index - 1) % num_prompts
217+
prompt_txt.text = prompt_labels[prompt_index]
218+
pycam.display.refresh()
219+
220+
if pycam.select.fell:
221+
if not new_prompt:
222+
file_index = -1
223+
new_prompt = True
224+
filename = all_images[file_index]
225+
load_image(bitmap, filename)
226+
else:
227+
new_prompt = False
228+
pycam.display.refresh()
229+
230+
if pycam.ok.fell:
231+
if view:
232+
pycam.splash.pop()
233+
pycam.splash.pop()
234+
pycam.display.refresh()
235+
view = False
236+
if new_prompt:
237+
pycam.display_message("OpenAI..", color=0x00DD00)
238+
send_img(filename, prompts[prompt_index])
239+
new_prompt = False
240+
view = True
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# SPDX-FileCopyrightText: 2024 Liz Clark for Adafruit Industries
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
ALT_TEXT_PROMPT="Please describe what is in this image. The description should be in the style of alt text used on social media sites like Mastodon. Avoid talking about the quality of the image. Avoid talking about the background of the image. Only describe the main subject."
6+
HAIKU_PROMPT="Please identify the main subject of the image and write a haiku about it. It should be a three line poem that consists of three lines, with five syllables in the first line, seven in the second, and five in the third. It does not need to rhyme. Only return the haiku, no image description is necessary. Please insert a * between each line instead of a comma (,)."
7+
CABLE_PROMPT="This image features a type of cable connector. Please specifically identify the cable connection. You should describe the category of the cable and the specific type. For example, if you see a USB cable, you should also say whether it is type A, type mini B, etc. If you are not sure what type of cable connector it is, please state that and make your best inference. The response should be concise, no more than two sentences. You do not need to describe the general appearance of the image."
8+
TRANSLATE_PROMPT="This image has text that is not in English. Please translate it to English. Only return the translated text in English. For example, if the text reads hola (hello in Spanish), then return hello (translated to English). No further explanation is necessary. If you are not sure on how to translate a word(s) then please return the text as seen in the image. You do not need to describe the image."
9+
ALIEN_PROMPT="Describe this image as if you are a friendly alien visiting from another world in our galaxy. You have no context for society on Earth. Your descriptions are cute, inquisitive and concise (2-3 sentences). You do not need to comment on the quality of the image."
10+
WEIRD_PROMPT="Take in the full image as context. Is there anything in the image that is out of the ordinary or interesting? For example, if the image is of a backyard, do you see any animals or objects that catch your eye? You do not need to comment on the quality of the image. Please keep responses brief and concise, between one to two sentences. If you do not see anything out of the ordinary for the context of the image or anything particularly interesting, please return: Nothing to see here."

0 commit comments

Comments
 (0)