Skip to content

Commit de54ad5

Browse files
committed
Updated and simplified for new hw config
1 parent 81b2f97 commit de54ad5

File tree

1 file changed

+92
-94
lines changed

1 file changed

+92
-94
lines changed

ChatGPT_Voice_Assistant/assistant.py

Lines changed: 92 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,44 @@
66

77
import argparse
88
import os
9-
import sys
109

1110
from datetime import datetime, timedelta
1211
from queue import Queue
13-
from time import sleep
12+
import time
13+
import random
1414
from tempfile import NamedTemporaryFile
1515

1616
import speech_recognition as sr
1717
import openai
1818

19-
# Add your OpenAI API key here
20-
openai.api_key = "sk-..."
19+
import board
20+
import digitalio
21+
from adafruit_motorkit import MotorKit
22+
23+
openai.api_key = "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"
2124
SYSTEM_ROLE = (
2225
"You are a helpful voice assistant that answers questions and gives information"
2326
)
24-
25-
def speak(text):
26-
subprocess.run(["espeak-ng", text, "&"], check=False)
27-
27+
CHATGPT_MODEL = "gpt-3.5-turbo"
28+
WHISPER_MODEL = "whisper-1"
29+
ARM_MOVEMENT_TIME = 0.5
30+
BASE_MOUTH_DURATION = 0.2 # A higher number means slower mouth movement
31+
SPEECH_VARIANCE = 0.03 # A higher number means more variance in the mouth movement
32+
RECORD_TIMEOUT = 30
33+
34+
# Setup Motors
35+
kit = MotorKit(i2c=board.I2C())
36+
arms_motor = kit.motor1
37+
mouth_motor = kit.motor2
38+
39+
# Setup Foot Button
40+
foot_button = digitalio.DigitalInOut(board.D16)
41+
foot_button.direction = digitalio.Direction.INPUT
42+
foot_button.pull = digitalio.Pull.UP
2843

2944
def sendchat(prompt):
3045
completion = openai.ChatCompletion.create(
31-
model="gpt-3.5-turbo",
46+
model=CHATGPT_MODEL,
3247
messages=[
3348
{"role": "system", "content": SYSTEM_ROLE},
3449
{"role": "user", "content": prompt},
@@ -37,84 +52,80 @@ def sendchat(prompt):
3752
# Send the heard text to ChatGPT and return the result
3853
return completion.choices[0].message.content
3954

55+
def move_arms_motor(dir_up=True, speed=1.0):
56+
direction = 1 if dir_up else -1
57+
arms_motor.throttle = speed * direction
58+
time.sleep(ARM_MOVEMENT_TIME)
59+
arms_motor.throttle = 0
60+
61+
def move_mouth_motor(dir_open=True, duration=0.5, speed=1.0):
62+
direction = 1 if dir_open else -1
63+
mouth_motor.throttle = speed * direction
64+
time.sleep(duration)
65+
mouth_motor.throttle = 0
66+
67+
def move_mouth():
68+
move_mouth_motor(dir_open=True, duration=random_mouth_duration())
69+
move_mouth_motor(dir_open=False, duration=random_mouth_duration())
70+
71+
def random_mouth_duration():
72+
return BASE_MOUTH_DURATION + random.random() * SPEECH_VARIANCE - (SPEECH_VARIANCE / 2)
73+
74+
def move_arms(hide=True):
75+
move_arms_motor(dir_up= not hide)
76+
77+
def speak(text):
78+
# while the subprocess is still running, move the mouth
79+
with subprocess.Popen(["espeak-ng", text, "&"]) as proc:
80+
while proc.poll() is None:
81+
move_mouth()
4082

4183
def transcribe(wav_data):
4284
# Read the transcription.
4385
print("Transcribing...")
44-
with NamedTemporaryFile(suffix=".wav") as temp_file:
45-
result = openai.Audio.translate_raw("whisper-1", wav_data, temp_file.name)
46-
return result["text"].strip()
47-
86+
speak("Let me think about that")
87+
move_arms(hide=True)
88+
attempts = 0
89+
while attempts < 3:
90+
try:
91+
with NamedTemporaryFile(suffix=".wav") as temp_file:
92+
result = openai.Audio.translate_raw(WHISPER_MODEL, wav_data, temp_file.name)
93+
return result["text"].strip()
94+
except (
95+
openai.error.ServiceUnavailableError,
96+
openai.error.APIError
97+
):
98+
time.sleep(3)
99+
attempts += 1
100+
return "I wasn't able to understand you. Please repeat that."
48101

49102
class Listener:
50103
def __init__(
51-
self, default_microphone, record_timeout, energy_threshold, phrase_timeout
104+
self, energy_threshold, phrase_timeout
52105
):
53106
self.listener_handle = None
54-
self.recorder = sr.Recognizer()
55-
self.record_timeout = record_timeout
56-
self.recorder.energy_threshold = energy_threshold
57-
self.recorder.dynamic_energy_threshold = False
58-
self.recorder.pause_threshold = 1
59-
self.source = None
107+
self.recognizer = sr.Recognizer()
108+
self.recognizer.energy_threshold = energy_threshold
109+
self.recognizer.dynamic_energy_threshold = False
110+
self.recognizer.pause_threshold = 1
60111
self.last_sample = bytes()
61112
self.phrase_time = datetime.utcnow()
62113
self.phrase_timeout = phrase_timeout
63114
self.phrase_complete = False
64-
self.default_microphone = default_microphone
65115
# Thread safe Queue for passing data from the threaded recording callback.
66116
self.data_queue = Queue()
67-
self.source = self._get_microphone()
68-
69-
def _get_microphone(self):
70-
if self.source:
71-
return self.source
72-
mic_name = self.default_microphone
73-
source = None
74-
if not mic_name or mic_name == "list":
75-
print("Available microphone devices are: ")
76-
for index, name in enumerate(sr.Microphone.list_microphone_names()):
77-
print(f'Microphone with name "{name}" found')
78-
sys.exit()
79-
else:
80-
for index, name in enumerate(sr.Microphone.list_microphone_names()):
81-
if mic_name in name:
82-
print(f'Microphone with name "{name}" at index "{index}" found')
83-
source = sr.Microphone(sample_rate=16000, device_index=index)
84-
break
85-
if not source:
86-
print(f'Microphone with name "{mic_name}" not found')
87-
sys.exit()
88-
89-
with source:
90-
self.recorder.adjust_for_ambient_noise(source)
91-
92-
return source
117+
self.mic_dev_index = None
93118

94119
def listen(self):
95120
if not self.listener_handle:
96-
with self._get_microphone() as source:
97-
audio = self.recorder.listen(source)
121+
with sr.Microphone() as source:
122+
print(source.stream)
123+
self.recognizer.adjust_for_ambient_noise(source)
124+
audio = self.recognizer.listen(source, timeout=RECORD_TIMEOUT)
98125
data = audio.get_raw_data()
99126
self.data_queue.put(data)
100127

101-
def start(self):
102-
if not self.listener_handle:
103-
self.listener_handle = self.recorder.listen_in_background(
104-
self._get_microphone(),
105-
self.record_callback,
106-
phrase_time_limit=self.record_timeout,
107-
)
108-
109-
def stop(self, wait_for_stop: bool = False):
110-
self.listener_handle(wait_for_stop=wait_for_stop)
111-
self.listener_handle = None
112-
113128
def record_callback(self, _, audio: sr.AudioData) -> None:
114-
"""
115-
Threaded callback function to recieve audio data when recordings finish.
116-
audio: An AudioData containing the recorded bytes.
117-
"""
118129
# Grab the raw bytes and push it into the thread safe queue.
119130
data = audio.get_raw_data()
120131
self.data_queue.put(data)
@@ -143,14 +154,14 @@ def get_audio_data(self):
143154
data = self.get_speech()
144155
self.last_sample += data
145156

146-
source = self._get_microphone()
147-
148157
# Use AudioData to convert the raw data to wav data.
149-
return sr.AudioData(
150-
self.last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH
151-
)
152-
return None
158+
with sr.Microphone() as source:
159+
audio_data = sr.AudioData(
160+
self.last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH
161+
)
162+
return audio_data
153163

164+
return None
154165

155166
def main():
156167
parser = argparse.ArgumentParser()
@@ -160,44 +171,32 @@ def main():
160171
help="Energy level for mic to detect.",
161172
type=int,
162173
)
163-
parser.add_argument(
164-
"--record_timeout",
165-
default=2,
166-
help="How real time the recording is in seconds.",
167-
type=float,
168-
)
169174
parser.add_argument(
170175
"--phrase_timeout",
171176
default=3,
172177
help="How much empty space between recordings before we "
173178
"consider it a new line in the transcription.",
174179
type=float,
175180
)
176-
parser.add_argument(
177-
"--default_microphone",
178-
default="pulse",
179-
help="Default microphone name for SpeechRecognition. "
180-
"Run this with 'list' to view available Microphones.",
181-
type=str,
182-
)
181+
183182
args = parser.parse_args()
184183

185184
listener = Listener(
186-
args.default_microphone,
187-
args.record_timeout,
188185
args.energy_threshold,
189186
args.phrase_timeout,
190187
)
191188

192189
transcription = [""]
193190

194-
print("How may I help you?")
195-
speak("How may I help you?")
196-
197191
while True:
198192
try:
193+
# If button is pressed, start listening
194+
if not foot_button.value:
195+
print("How may I help you?")
196+
speak("How may I help you?")
197+
listener.listen()
198+
199199
# Pull raw recorded audio from the queue.
200-
listener.listen()
201200
if listener.speech_waiting():
202201
audio_data = listener.get_audio_data()
203202
text = transcribe(audio_data.get_wav_data())
@@ -209,6 +208,7 @@ def main():
209208
chat_response = sendchat(text)
210209
transcription.append(f"> {chat_response}")
211210
print("Got response from ChatGPT. Beginning speech synthesis.")
211+
move_arms(hide=False)
212212
speak(chat_response)
213213
print("Done speaking.")
214214
else:
@@ -219,12 +219,10 @@ def main():
219219
for line in transcription:
220220
print(line)
221221
print("", end="", flush=True)
222-
sleep(0.25)
223-
except (AssertionError, AttributeError):
224-
pass
222+
time.sleep(0.25)
225223
except KeyboardInterrupt:
226224
break
227-
225+
move_arms(hide=False)
228226
print("\n\nTranscription:")
229227
for line in transcription:
230228
print(line)

0 commit comments

Comments
 (0)