Skip to content

Commit ba5799f

Browse files
authored
Merge pull request #2467 from makermelissa/main
ChatGPT Bear Updated Code and Case Design Files
2 parents 3413378 + f14c120 commit ba5799f

File tree

10 files changed

+271
-234
lines changed

10 files changed

+271
-234
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
40.3 KB
Binary file not shown.
157 KB
Binary file not shown.

ChatGPT_Bear/Enclosure/Case Top.3mf

124 KB
Binary file not shown.

ChatGPT_Bear/Enclosure/Case Top.stl

486 KB
Binary file not shown.

ChatGPT_Bear/assistant.py

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
# SPDX-FileCopyrightText: 2023 Melissa LeBlanc-Williams for Adafruit Industries
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
import threading
6+
import os
7+
8+
from datetime import datetime, timedelta
9+
from queue import Queue
10+
import time
11+
import random
12+
from tempfile import NamedTemporaryFile
13+
14+
import azure.cognitiveservices.speech as speechsdk
15+
import speech_recognition as sr
16+
import openai
17+
18+
import board
19+
import digitalio
20+
from adafruit_motorkit import MotorKit
21+
22+
# ChatGPT Parameters
23+
SYSTEM_ROLE = (
24+
"You are a helpful voice assistant in the form of a talking teddy bear"
25+
" that answers questions and gives information"
26+
)
27+
CHATGPT_MODEL = "gpt-3.5-turbo"
28+
WHISPER_MODEL = "whisper-1"
29+
30+
# Azure Parameters
31+
AZURE_SPEECH_VOICE = "en-GB-OliverNeural"
32+
33+
# Speech Recognition Parameters
34+
ENERGY_THRESHOLD = 1000 # Energy level for mic to detect
35+
PHRASE_TIMEOUT = 3.0 # Space between recordings for sepating phrases
36+
RECORD_TIMEOUT = 30
37+
38+
# Motor Parameters
39+
ARM_MOVEMENT_TIME = 0.5
40+
BASE_MOUTH_DURATION = 0.2 # A higher number means slower mouth movement
41+
SPEECH_VARIANCE = 0.1 # Higher allows more mouth movement variance.
42+
# It pauses for BASE_MOUTH_DURATION ± SPEECH_VARIANCE
43+
MOTOR_DUTY_CYCLE = 1.0 # Lower provides less power to the motors
44+
45+
# Import keys from environment variables
46+
openai.api_key = os.environ.get("OPENAI_API_KEY")
47+
speech_key = os.environ.get("SPEECH_KEY")
48+
service_region = os.environ.get("SPEECH_REGION")
49+
50+
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
51+
speech_config.speech_synthesis_voice_name = AZURE_SPEECH_VOICE
52+
53+
54+
def sendchat(prompt):
55+
completion = openai.ChatCompletion.create(
56+
model=CHATGPT_MODEL,
57+
messages=[
58+
{"role": "system", "content": SYSTEM_ROLE},
59+
{"role": "user", "content": prompt},
60+
],
61+
)
62+
# Send the heard text to ChatGPT and return the result
63+
return completion.choices[0].message.content
64+
65+
66+
def transcribe(wav_data):
67+
# Read the transcription.
68+
print("Transcribing...")
69+
attempts = 0
70+
while attempts < 3:
71+
try:
72+
with NamedTemporaryFile(suffix=".wav") as temp_file:
73+
result = openai.Audio.translate_raw(
74+
WHISPER_MODEL, wav_data, temp_file.name
75+
)
76+
return result["text"].strip()
77+
except (openai.error.ServiceUnavailableError, openai.error.APIError):
78+
time.sleep(3)
79+
attempts += 1
80+
return "I wasn't able to understand you. Please repeat that."
81+
82+
83+
class Listener:
84+
def __init__(self):
85+
self.listener_handle = None
86+
self.recognizer = sr.Recognizer()
87+
self.recognizer.energy_threshold = ENERGY_THRESHOLD
88+
self.recognizer.dynamic_energy_threshold = False
89+
self.recognizer.pause_threshold = 1
90+
self.last_sample = bytes()
91+
self.phrase_time = datetime.utcnow()
92+
self.phrase_timeout = PHRASE_TIMEOUT
93+
self.phrase_complete = False
94+
# Thread safe Queue for passing data from the threaded recording callback.
95+
self.data_queue = Queue()
96+
self.mic_dev_index = None
97+
98+
def listen(self):
99+
if not self.listener_handle:
100+
with sr.Microphone() as source:
101+
print(source.stream)
102+
self.recognizer.adjust_for_ambient_noise(source)
103+
audio = self.recognizer.listen(source, timeout=RECORD_TIMEOUT)
104+
data = audio.get_raw_data()
105+
self.data_queue.put(data)
106+
107+
def record_callback(self, _, audio: sr.AudioData) -> None:
108+
# Grab the raw bytes and push it into the thread safe queue.
109+
data = audio.get_raw_data()
110+
self.data_queue.put(data)
111+
112+
def speech_waiting(self):
113+
return not self.data_queue.empty()
114+
115+
def get_speech(self):
116+
if self.speech_waiting():
117+
return self.data_queue.get()
118+
return None
119+
120+
def get_audio_data(self):
121+
now = datetime.utcnow()
122+
if self.speech_waiting():
123+
self.phrase_complete = False
124+
if self.phrase_time and now - self.phrase_time > timedelta(
125+
seconds=self.phrase_timeout
126+
):
127+
self.last_sample = bytes()
128+
self.phrase_complete = True
129+
self.phrase_time = now
130+
131+
# Concatenate our current audio data with the latest audio data.
132+
while self.speech_waiting():
133+
data = self.get_speech()
134+
self.last_sample += data
135+
136+
# Use AudioData to convert the raw data to wav data.
137+
with sr.Microphone() as source:
138+
audio_data = sr.AudioData(
139+
self.last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH
140+
)
141+
return audio_data
142+
143+
return None
144+
145+
146+
class Bear:
147+
def __init__(self, azure_speech_config):
148+
kit = MotorKit(i2c=board.I2C())
149+
self._arms_motor = kit.motor1
150+
self._mouth_motor = kit.motor2
151+
152+
# Setup Foot Button
153+
self._foot_button = digitalio.DigitalInOut(board.D16)
154+
self._foot_button.direction = digitalio.Direction.INPUT
155+
self._foot_button.pull = digitalio.Pull.UP
156+
157+
self.do_mouth_movement = False
158+
self._mouth_thread = threading.Thread(target=self.move_mouth, daemon=True)
159+
self._mouth_thread.start()
160+
161+
self._speech_synthesizer = speechsdk.SpeechSynthesizer(
162+
speech_config=azure_speech_config
163+
)
164+
self._speech_synthesizer.synthesizing.connect(self.start_moving_mouth)
165+
self._speech_synthesizer.synthesis_completed.connect(self.stop_moving_mouth)
166+
167+
def start_moving_mouth(self, _event):
168+
self.do_mouth_movement = True
169+
170+
def stop_moving_mouth(self, _event):
171+
self.do_mouth_movement = False
172+
173+
def deinit(self):
174+
self.do_mouth_movement = False
175+
self._mouth_thread.join()
176+
self._arms_motor.throttle = None
177+
self._mouth_motor.throttle = None
178+
self._speech_synthesizer.synthesis_started.disconnect_all()
179+
self._speech_synthesizer.synthesis_completed.disconnect_all()
180+
181+
def _move_arms_motor(self, dir_up=True):
182+
direction = -1 if dir_up else 1
183+
self._arms_motor.throttle = MOTOR_DUTY_CYCLE * direction
184+
time.sleep(ARM_MOVEMENT_TIME)
185+
# Remove Power from the motor to avoid overheating
186+
self._arms_motor.throttle = None
187+
188+
def _move_mouth_motor(self, dir_open=True):
189+
duration = (
190+
BASE_MOUTH_DURATION
191+
+ random.random() * SPEECH_VARIANCE
192+
- (SPEECH_VARIANCE / 2)
193+
)
194+
# Only power the motor while opening and let the spring close it
195+
self._mouth_motor.throttle = MOTOR_DUTY_CYCLE if dir_open else None
196+
time.sleep(duration)
197+
# Remove Power from the motor and let close to avoid overheating
198+
self._mouth_motor.throttle = None
199+
200+
def foot_pressed(self):
201+
return not self._foot_button.value
202+
203+
def move_mouth(self):
204+
print("Starting mouth movement thread")
205+
while True:
206+
if self.do_mouth_movement:
207+
self._move_mouth_motor(dir_open=True)
208+
self._move_mouth_motor(dir_open=False)
209+
210+
def move_arms(self, hide=True):
211+
self._move_arms_motor(dir_up=hide)
212+
213+
def speak(self, text):
214+
result = self._speech_synthesizer.speak_text_async(text).get()
215+
216+
# Check result
217+
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
218+
print("Speech synthesized for text [{}]".format(text))
219+
elif result.reason == speechsdk.ResultReason.Canceled:
220+
cancellation_details = result.cancellation_details
221+
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
222+
if cancellation_details.reason == speechsdk.CancellationReason.Error:
223+
print("Error details: {}".format(cancellation_details.error_details))
224+
225+
226+
def main():
227+
listener = Listener()
228+
bear = Bear(speech_config)
229+
230+
transcription = [""]
231+
232+
while True:
233+
try:
234+
# If button is pressed, start listening
235+
if bear.foot_pressed():
236+
print("How may I help you?")
237+
bear.speak("How may I help you?")
238+
listener.listen()
239+
240+
# Pull raw recorded audio from the queue.
241+
if listener.speech_waiting():
242+
audio_data = listener.get_audio_data()
243+
bear.speak("Let me think about that")
244+
bear.move_arms(hide=True)
245+
text = transcribe(audio_data.get_wav_data())
246+
247+
if text:
248+
if listener.phrase_complete:
249+
transcription.append(text)
250+
print(f"Phrase Complete. Sent '{text}' to ChatGPT.")
251+
chat_response = sendchat(text)
252+
transcription.append(f"> {chat_response}")
253+
print("Got response from ChatGPT. Beginning speech synthesis.")
254+
bear.move_arms(hide=False)
255+
bear.speak(chat_response)
256+
else:
257+
print("Partial Phrase...")
258+
transcription[-1] = text
259+
260+
os.system("clear")
261+
for line in transcription:
262+
print(line)
263+
print("", end="", flush=True)
264+
time.sleep(0.25)
265+
except KeyboardInterrupt:
266+
break
267+
bear.deinit()
268+
269+
270+
if __name__ == "__main__":
271+
main()

0 commit comments

Comments
 (0)