Skip to content

Commit 494cf6b

Browse files
committed
Rewrote listener for Magic Storybook and added desktop icon
1 parent f7e8bbe commit 494cf6b

File tree

4 files changed

+61
-92
lines changed

4 files changed

+61
-92
lines changed
29.3 KB
Loading

Magic_AI_Storybook/listener.py

Lines changed: 34 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2,56 +2,40 @@
22
#
33
# SPDX-License-Identifier: MIT
44

5-
from datetime import datetime, timedelta
6-
from queue import Queue
5+
import time
76

87
import speech_recognition as sr
98

109

1110
class Listener:
12-
def __init__(self, energy_threshold=1000, phrase_timeout=3.0, record_timeout=30):
11+
def __init__(self, api_key, energy_threshold=300, record_timeout=30):
1312
self.listener_handle = None
13+
self.microphone = sr.Microphone()
1414
self.recognizer = sr.Recognizer()
1515
self.recognizer.energy_threshold = energy_threshold
16-
self.recognizer.dynamic_energy_threshold = False
17-
self.recognizer.pause_threshold = 1
18-
self.last_sample = bytes()
19-
self.phrase_time = datetime.utcnow()
20-
self.phrase_timeout = phrase_timeout
16+
with self.microphone as source:
17+
self.recognizer.adjust_for_ambient_noise(source) # we only need to calibrate once, before we start listening
2118
self.record_timeout = record_timeout
22-
self.phrase_complete = False
23-
# Thread safe Queue for passing data from the threaded recording callback.
24-
self.data_queue = Queue()
25-
self.mic_dev_index = None
19+
self.listener_handle = None
20+
self.audio = None
21+
self.api_key = api_key
2622

2723
def listen(self, ready_callback=None):
28-
self.phrase_complete = False
29-
start = datetime.utcnow()
30-
self.start_listening()
24+
self._start_listening()
3125
if ready_callback:
3226
ready_callback()
3327
while (
3428
self.listener_handle
35-
and not self.speech_waiting()
36-
or not self.phrase_complete
29+
and self.audio is None
3730
):
38-
if self.phrase_time and start - self.phrase_time > timedelta(
39-
seconds=self.phrase_timeout
40-
):
41-
self.last_sample = bytes()
42-
self.phrase_complete = True
43-
self.phrase_time = start
31+
time.sleep(0.1)
4432
self.stop_listening()
4533

46-
def start_listening(self):
47-
if not self.listener_handle:
48-
with sr.Microphone() as source:
49-
self.recognizer.adjust_for_ambient_noise(source)
50-
self.listener_handle = self.recognizer.listen_in_background(
51-
sr.Microphone(),
52-
self.record_callback,
53-
phrase_time_limit=self.record_timeout,
54-
)
34+
def _save_audio_callback(self, _recognizer, audio):
35+
self.audio = audio
36+
37+
def _start_listening(self):
38+
self.listener_handle = self.recognizer.listen_in_background(self.microphone, self._save_audio_callback)
5539

5640
def stop_listening(self, wait_for_stop=False):
5741
if self.listener_handle:
@@ -61,40 +45,24 @@ def stop_listening(self, wait_for_stop=False):
6145
def is_listening(self):
6246
return self.listener_handle is not None
6347

64-
def record_callback(self, _, audio: sr.AudioData) -> None:
65-
# Grab the raw bytes and push it into the thread safe queue.
66-
data = audio.get_raw_data()
67-
self.data_queue.put(data)
68-
6948
def speech_waiting(self):
70-
return not self.data_queue.empty()
71-
72-
def get_speech(self):
73-
if self.speech_waiting():
74-
return self.data_queue.get()
75-
return None
76-
77-
def get_audio_data(self):
78-
now = datetime.utcnow()
79-
if self.speech_waiting():
80-
self.phrase_complete = False
81-
if self.phrase_time and now - self.phrase_time > timedelta(
82-
seconds=self.phrase_timeout
83-
):
84-
self.last_sample = bytes()
85-
self.phrase_complete = True
86-
self.phrase_time = now
87-
88-
# Concatenate our current audio data with the latest audio data.
89-
while self.speech_waiting():
90-
data = self.get_speech()
91-
self.last_sample += data
49+
return self.audio is not None
9250

93-
# Use AudioData to convert the raw data to wav data.
94-
with sr.Microphone() as source:
95-
audio_data = sr.AudioData(
96-
self.last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH
97-
)
98-
return audio_data
51+
def recognize(self):
52+
if self.audio:
53+
# Transcribe the audio data to text using Whisper
54+
print("Recognizing...")
55+
attempts = 0
56+
while attempts < 3:
57+
try:
58+
result = self.recognizer.recognize_whisper_api(
59+
self.audio, api_key=self.api_key
60+
)
9961

100-
return None
62+
return result.strip()
63+
except sr.RequestError as e:
64+
time.sleep(3)
65+
attempts += 1
66+
print("I wasn't able to understand you. Please repeat that.")
67+
return None
68+
return None

Magic_AI_Storybook/story.py

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-FileCopyrightText: 2023 Melissa LeBlanc-Williams for Adafruit Industries
22
#
33
# SPDX-License-Identifier: MIT
4+
# Desktop Icon from <a href="https://www.flaticon.com/free-icons/book" title="book icons">Book icons created by Freepik - Flaticon</a>
45

56
import threading
67
import sys
@@ -26,7 +27,7 @@
2627
STORY_WORD_LENGTH = 800
2728
REED_SWITCH_PIN = board.D17
2829
NEOPIXEL_PIN = board.D18
29-
API_KEYS_FILE = "/home/pi/keys.txt"
30+
API_KEYS_FILE = "~/keys.txt"
3031
PROMPT_FILE = "/boot/bookprompt.txt"
3132

3233
# Neopixel Settings
@@ -47,8 +48,11 @@
4748
BUTTON_NEW_IMAGE = "button_new.png"
4849

4950
# Asset Paths
50-
IMAGES_PATH = os.path.dirname(sys.argv[0]) + "images/"
51-
FONTS_PATH = os.path.dirname(sys.argv[0]) + "fonts/"
51+
BASE_PATH = os.path.dirname(sys.argv[0])
52+
if BASE_PATH != "":
53+
BASE_PATH += "/"
54+
IMAGES_PATH = BASE_PATH + "images/"
55+
FONTS_PATH = BASE_PATH + "fonts/"
5256

5357
# Font Path, Size
5458
TITLE_FONT = (FONTS_PATH + "Desdemona Black Regular.otf", 48)
@@ -58,7 +62,7 @@
5862

5963
# Delays to control the speed of the text
6064
WORD_DELAY = 0.1
61-
WELCOME_IMAGE_DELAY = 3
65+
WELCOME_IMAGE_DELAY = 0
6266
TITLE_FADE_TIME = 0.05
6367
TITLE_FADE_STEPS = 25
6468
TEXT_FADE_TIME = 0.25
@@ -84,6 +88,12 @@
8488

8589
# Do some checks and Import API keys from API_KEYS_FILE
8690
config = configparser.ConfigParser()
91+
92+
username = os.environ["SUDO_USER"]
93+
user_homedir = os.path.expanduser(f"~{username}")
94+
API_KEYS_FILE = API_KEYS_FILE.replace("~", user_homedir)
95+
96+
print(os.path.expanduser(API_KEYS_FILE))
8797
config.read(os.path.expanduser(API_KEYS_FILE))
8898
if not config.has_section("openai"):
8999
print("Please make sure API_KEYS_FILE points to a valid file.")
@@ -186,7 +196,7 @@ def __init__(self, rotation=0):
186196
self._busy = False
187197
# Use a cursor to keep track of where we are in the text area
188198
self.cursor = {"x": 0, "y": 0}
189-
self.listener = Listener(ENERGY_THRESHOLD, PHRASE_TIMEOUT, RECORD_TIMEOUT)
199+
self.listener = None
190200
self.backlight = Backlight()
191201
self.pixels = neopixel.NeoPixel(
192202
NEOPIXEL_PIN,
@@ -202,7 +212,7 @@ def __init__(self, rotation=0):
202212

203213
def start(self):
204214
# Output to the LCD instead of the console
205-
os.putenv("DISPLAY", ":0")
215+
#os.putenv("DISPLAY", ":0")
206216

207217
# Initialize the display
208218
pygame.init()
@@ -217,6 +227,9 @@ def start(self):
217227
self.display_welcome()
218228
start_time = time.monotonic()
219229

230+
#Initialize the Listener
231+
self.listener = Listener(openai.api_key, ENERGY_THRESHOLD, RECORD_TIMEOUT)
232+
220233
# Preload remaining images
221234
self._load_image("background", BACKGROUND_IMAGE)
222235
self._load_image("loading", LOADING_IMAGE)
@@ -585,9 +598,7 @@ def show_waiting():
585598
# No response from user, so return
586599
return
587600

588-
audio_data = self.listener.get_audio_data()
589-
590-
story_request = self._transcribe(audio_data.get_wav_data())
601+
story_request = self.listener.recognize()
591602

592603
story_prompt = self._make_story_prompt(story_request)
593604
self.display_loading()
@@ -636,23 +647,6 @@ def _make_story_prompt(self, request):
636647
STORY_WORD_LENGTH=STORY_WORD_LENGTH, STORY_REQUEST=request
637648
)
638649

639-
@staticmethod
640-
def _transcribe(wav_data):
641-
# Transcribe the audio data to text using Whisper
642-
print("Transcribing...")
643-
attempts = 0
644-
while attempts < 3:
645-
try:
646-
with NamedTemporaryFile(suffix=".wav") as temp_file:
647-
result = openai.Audio.translate_raw(
648-
WHISPER_MODEL, wav_data, temp_file.name
649-
)
650-
return result["text"].strip()
651-
except (openai.error.ServiceUnavailableError, openai.error.APIError):
652-
time.sleep(3)
653-
attempts += 1
654-
return "I wasn't able to understand you. Please repeat that."
655-
656650
def _sendchat(self, prompt):
657651
response = ""
658652
print("Sending to chatGPT")

Magic_AI_Storybook/storybook.desktop

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[Desktop Entry]
2+
Comment=Run Magic Storybook
3+
Terminal=true
4+
Name=Magic Storybook
5+
Exec=sudo python -E /home/pi/Magic_AI_Storybook/story.py
6+
Type=Application
7+
Icon=/home/pi/Magic_AI_Storybook/images/magic_book_icon.png

0 commit comments

Comments
 (0)