6
6
7
7
import argparse
8
8
import os
9
- import sys
10
9
11
10
from datetime import datetime , timedelta
12
11
from queue import Queue
13
- from time import sleep
12
+ import time
13
+ import random
14
14
from tempfile import NamedTemporaryFile
15
15
16
16
import speech_recognition as sr
17
17
import openai
18
18
19
- # Add your OpenAI API key here
20
- openai .api_key = "sk-..."
19
+ import board
20
+ import digitalio
21
+ from adafruit_motorkit import MotorKit
22
+
23
+ openai .api_key = "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"
21
24
SYSTEM_ROLE = (
22
25
"You are a helpful voice assistant that answers questions and gives information"
23
26
)
24
-
25
- def speak (text ):
26
- subprocess .run (["espeak-ng" , text , "&" ], check = False )
27
-
27
+ CHATGPT_MODEL = "gpt-3.5-turbo"
28
+ WHISPER_MODEL = "whisper-1"
29
+ ARM_MOVEMENT_TIME = 0.5
30
+ BASE_MOUTH_DURATION = 0.2 # A higher number means slower mouth movement
31
+ SPEECH_VARIANCE = 0.03 # A higher number means more variance in the mouth movement
32
+ RECORD_TIMEOUT = 30
33
+
34
+ # Setup Motors
35
+ kit = MotorKit (i2c = board .I2C ())
36
+ arms_motor = kit .motor1
37
+ mouth_motor = kit .motor2
38
+
39
+ # Setup Foot Button
40
+ foot_button = digitalio .DigitalInOut (board .D16 )
41
+ foot_button .direction = digitalio .Direction .INPUT
42
+ foot_button .pull = digitalio .Pull .UP
28
43
29
44
def sendchat (prompt ):
30
45
completion = openai .ChatCompletion .create (
31
- model = "gpt-3.5-turbo" ,
46
+ model = CHATGPT_MODEL ,
32
47
messages = [
33
48
{"role" : "system" , "content" : SYSTEM_ROLE },
34
49
{"role" : "user" , "content" : prompt },
@@ -37,84 +52,80 @@ def sendchat(prompt):
37
52
# Send the heard text to ChatGPT and return the result
38
53
return completion .choices [0 ].message .content
39
54
55
+ def move_arms_motor (dir_up = True , speed = 1.0 ):
56
+ direction = 1 if dir_up else - 1
57
+ arms_motor .throttle = speed * direction
58
+ time .sleep (ARM_MOVEMENT_TIME )
59
+ arms_motor .throttle = 0
60
+
61
+ def move_mouth_motor (dir_open = True , duration = 0.5 , speed = 1.0 ):
62
+ direction = 1 if dir_open else - 1
63
+ mouth_motor .throttle = speed * direction
64
+ time .sleep (duration )
65
+ mouth_motor .throttle = 0
66
+
67
+ def move_mouth ():
68
+ move_mouth_motor (dir_open = True , duration = random_mouth_duration ())
69
+ move_mouth_motor (dir_open = False , duration = random_mouth_duration ())
70
+
71
+ def random_mouth_duration ():
72
+ return BASE_MOUTH_DURATION + random .random () * SPEECH_VARIANCE - (SPEECH_VARIANCE / 2 )
73
+
74
+ def move_arms (hide = True ):
75
+ move_arms_motor (dir_up = not hide )
76
+
77
+ def speak (text ):
78
+ # while the subprocess is still running, move the mouth
79
+ with subprocess .Popen (["espeak-ng" , text , "&" ]) as proc :
80
+ while proc .poll () is None :
81
+ move_mouth ()
40
82
41
83
def transcribe (wav_data ):
42
84
# Read the transcription.
43
85
print ("Transcribing..." )
44
- with NamedTemporaryFile (suffix = ".wav" ) as temp_file :
45
- result = openai .Audio .translate_raw ("whisper-1" , wav_data , temp_file .name )
46
- return result ["text" ].strip ()
47
-
86
+ speak ("Let me think about that" )
87
+ move_arms (hide = True )
88
+ attempts = 0
89
+ while attempts < 3 :
90
+ try :
91
+ with NamedTemporaryFile (suffix = ".wav" ) as temp_file :
92
+ result = openai .Audio .translate_raw (WHISPER_MODEL , wav_data , temp_file .name )
93
+ return result ["text" ].strip ()
94
+ except (
95
+ openai .error .ServiceUnavailableError ,
96
+ openai .error .APIError
97
+ ):
98
+ time .sleep (3 )
99
+ attempts += 1
100
+ return "I wasn't able to understand you. Please repeat that."
48
101
49
102
class Listener :
50
103
def __init__ (
51
- self , default_microphone , record_timeout , energy_threshold , phrase_timeout
104
+ self , energy_threshold , phrase_timeout
52
105
):
53
106
self .listener_handle = None
54
- self .recorder = sr .Recognizer ()
55
- self .record_timeout = record_timeout
56
- self .recorder .energy_threshold = energy_threshold
57
- self .recorder .dynamic_energy_threshold = False
58
- self .recorder .pause_threshold = 1
59
- self .source = None
107
+ self .recognizer = sr .Recognizer ()
108
+ self .recognizer .energy_threshold = energy_threshold
109
+ self .recognizer .dynamic_energy_threshold = False
110
+ self .recognizer .pause_threshold = 1
60
111
self .last_sample = bytes ()
61
112
self .phrase_time = datetime .utcnow ()
62
113
self .phrase_timeout = phrase_timeout
63
114
self .phrase_complete = False
64
- self .default_microphone = default_microphone
65
115
# Thread safe Queue for passing data from the threaded recording callback.
66
116
self .data_queue = Queue ()
67
- self .source = self ._get_microphone ()
68
-
69
- def _get_microphone (self ):
70
- if self .source :
71
- return self .source
72
- mic_name = self .default_microphone
73
- source = None
74
- if not mic_name or mic_name == "list" :
75
- print ("Available microphone devices are: " )
76
- for index , name in enumerate (sr .Microphone .list_microphone_names ()):
77
- print (f'Microphone with name "{ name } " found' )
78
- sys .exit ()
79
- else :
80
- for index , name in enumerate (sr .Microphone .list_microphone_names ()):
81
- if mic_name in name :
82
- print (f'Microphone with name "{ name } " at index "{ index } " found' )
83
- source = sr .Microphone (sample_rate = 16000 , device_index = index )
84
- break
85
- if not source :
86
- print (f'Microphone with name "{ mic_name } " not found' )
87
- sys .exit ()
88
-
89
- with source :
90
- self .recorder .adjust_for_ambient_noise (source )
91
-
92
- return source
117
+ self .mic_dev_index = None
93
118
94
119
def listen (self ):
95
120
if not self .listener_handle :
96
- with self ._get_microphone () as source :
97
- audio = self .recorder .listen (source )
121
+ with sr .Microphone () as source :
122
+ print (source .stream )
123
+ self .recognizer .adjust_for_ambient_noise (source )
124
+ audio = self .recognizer .listen (source , timeout = RECORD_TIMEOUT )
98
125
data = audio .get_raw_data ()
99
126
self .data_queue .put (data )
100
127
101
- def start (self ):
102
- if not self .listener_handle :
103
- self .listener_handle = self .recorder .listen_in_background (
104
- self ._get_microphone (),
105
- self .record_callback ,
106
- phrase_time_limit = self .record_timeout ,
107
- )
108
-
109
- def stop (self , wait_for_stop : bool = False ):
110
- self .listener_handle (wait_for_stop = wait_for_stop )
111
- self .listener_handle = None
112
-
113
128
def record_callback (self , _ , audio : sr .AudioData ) -> None :
114
- """
115
- Threaded callback function to recieve audio data when recordings finish.
116
- audio: An AudioData containing the recorded bytes.
117
- """
118
129
# Grab the raw bytes and push it into the thread safe queue.
119
130
data = audio .get_raw_data ()
120
131
self .data_queue .put (data )
@@ -143,14 +154,14 @@ def get_audio_data(self):
143
154
data = self .get_speech ()
144
155
self .last_sample += data
145
156
146
- source = self ._get_microphone ()
147
-
148
157
# Use AudioData to convert the raw data to wav data.
149
- return sr .AudioData (
150
- self .last_sample , source .SAMPLE_RATE , source .SAMPLE_WIDTH
151
- )
152
- return None
158
+ with sr .Microphone () as source :
159
+ audio_data = sr .AudioData (
160
+ self .last_sample , source .SAMPLE_RATE , source .SAMPLE_WIDTH
161
+ )
162
+ return audio_data
153
163
164
+ return None
154
165
155
166
def main ():
156
167
parser = argparse .ArgumentParser ()
@@ -160,44 +171,32 @@ def main():
160
171
help = "Energy level for mic to detect." ,
161
172
type = int ,
162
173
)
163
- parser .add_argument (
164
- "--record_timeout" ,
165
- default = 2 ,
166
- help = "How real time the recording is in seconds." ,
167
- type = float ,
168
- )
169
174
parser .add_argument (
170
175
"--phrase_timeout" ,
171
176
default = 3 ,
172
177
help = "How much empty space between recordings before we "
173
178
"consider it a new line in the transcription." ,
174
179
type = float ,
175
180
)
176
- parser .add_argument (
177
- "--default_microphone" ,
178
- default = "pulse" ,
179
- help = "Default microphone name for SpeechRecognition. "
180
- "Run this with 'list' to view available Microphones." ,
181
- type = str ,
182
- )
181
+
183
182
args = parser .parse_args ()
184
183
185
184
listener = Listener (
186
- args .default_microphone ,
187
- args .record_timeout ,
188
185
args .energy_threshold ,
189
186
args .phrase_timeout ,
190
187
)
191
188
192
189
transcription = ["" ]
193
190
194
- print ("How may I help you?" )
195
- speak ("How may I help you?" )
196
-
197
191
while True :
198
192
try :
193
+ # If button is pressed, start listening
194
+ if not foot_button .value :
195
+ print ("How may I help you?" )
196
+ speak ("How may I help you?" )
197
+ listener .listen ()
198
+
199
199
# Pull raw recorded audio from the queue.
200
- listener .listen ()
201
200
if listener .speech_waiting ():
202
201
audio_data = listener .get_audio_data ()
203
202
text = transcribe (audio_data .get_wav_data ())
@@ -209,6 +208,7 @@ def main():
209
208
chat_response = sendchat (text )
210
209
transcription .append (f"> { chat_response } " )
211
210
print ("Got response from ChatGPT. Beginning speech synthesis." )
211
+ move_arms (hide = False )
212
212
speak (chat_response )
213
213
print ("Done speaking." )
214
214
else :
@@ -219,12 +219,10 @@ def main():
219
219
for line in transcription :
220
220
print (line )
221
221
print ("" , end = "" , flush = True )
222
- sleep (0.25 )
223
- except (AssertionError , AttributeError ):
224
- pass
222
+ time .sleep (0.25 )
225
223
except KeyboardInterrupt :
226
224
break
227
-
225
+ move_arms ( hide = False )
228
226
print ("\n \n Transcription:" )
229
227
for line in transcription :
230
228
print (line )
0 commit comments