2
2
#
3
3
# SPDX-License-Identifier: MIT
4
4
5
- from datetime import datetime , timedelta
6
- from queue import Queue
5
+ import time
7
6
8
7
import speech_recognition as sr
9
8
10
9
11
10
class Listener :
12
- def __init__ (self , energy_threshold = 1000 , phrase_timeout = 3.0 , record_timeout = 30 ):
11
+ def __init__ (self , api_key , energy_threshold = 300 , record_timeout = 30 ):
13
12
self .listener_handle = None
13
+ self .microphone = sr .Microphone ()
14
14
self .recognizer = sr .Recognizer ()
15
15
self .recognizer .energy_threshold = energy_threshold
16
- self .recognizer .dynamic_energy_threshold = False
17
- self .recognizer .pause_threshold = 1
18
- self .last_sample = bytes ()
19
- self .phrase_time = datetime .utcnow ()
20
- self .phrase_timeout = phrase_timeout
16
+ with self .microphone as source :
17
+ self .recognizer .adjust_for_ambient_noise (
18
+ source
19
+ ) # we only need to calibrate once, before we start listening
21
20
self .record_timeout = record_timeout
22
- self .phrase_complete = False
23
- # Thread safe Queue for passing data from the threaded recording callback.
24
- self .data_queue = Queue ()
25
- self .mic_dev_index = None
21
+ self .listener_handle = None
22
+ self .audio = None
23
+ self .api_key = api_key
26
24
27
25
def listen (self , ready_callback = None ):
28
- self .phrase_complete = False
29
- start = datetime .utcnow ()
30
- self .start_listening ()
26
+ self ._start_listening ()
31
27
if ready_callback :
32
28
ready_callback ()
33
- while (
34
- self .listener_handle
35
- and not self .speech_waiting ()
36
- or not self .phrase_complete
37
- ):
38
- if self .phrase_time and start - self .phrase_time > timedelta (
39
- seconds = self .phrase_timeout
40
- ):
41
- self .last_sample = bytes ()
42
- self .phrase_complete = True
43
- self .phrase_time = start
29
+ while self .listener_handle and self .audio is None :
30
+ time .sleep (0.1 )
44
31
self .stop_listening ()
45
32
46
- def start_listening (self ):
47
- if not self .listener_handle :
48
- with sr .Microphone () as source :
49
- self .recognizer .adjust_for_ambient_noise (source )
50
- self .listener_handle = self .recognizer .listen_in_background (
51
- sr .Microphone (),
52
- self .record_callback ,
53
- phrase_time_limit = self .record_timeout ,
54
- )
33
+ def _save_audio_callback (self , _recognizer , audio ):
34
+ self .audio = audio
35
+
36
+ def _start_listening (self ):
37
+ self .listener_handle = self .recognizer .listen_in_background (
38
+ self .microphone , self ._save_audio_callback
39
+ )
55
40
56
41
def stop_listening (self , wait_for_stop = False ):
57
42
if self .listener_handle :
@@ -61,40 +46,24 @@ def stop_listening(self, wait_for_stop=False):
61
46
def is_listening (self ):
62
47
return self .listener_handle is not None
63
48
64
- def record_callback (self , _ , audio : sr .AudioData ) -> None :
65
- # Grab the raw bytes and push it into the thread safe queue.
66
- data = audio .get_raw_data ()
67
- self .data_queue .put (data )
68
-
69
49
def speech_waiting (self ):
70
- return not self .data_queue .empty ()
71
-
72
- def get_speech (self ):
73
- if self .speech_waiting ():
74
- return self .data_queue .get ()
75
- return None
76
-
77
- def get_audio_data (self ):
78
- now = datetime .utcnow ()
79
- if self .speech_waiting ():
80
- self .phrase_complete = False
81
- if self .phrase_time and now - self .phrase_time > timedelta (
82
- seconds = self .phrase_timeout
83
- ):
84
- self .last_sample = bytes ()
85
- self .phrase_complete = True
86
- self .phrase_time = now
87
-
88
- # Concatenate our current audio data with the latest audio data.
89
- while self .speech_waiting ():
90
- data = self .get_speech ()
91
- self .last_sample += data
50
+ return self .audio is not None
92
51
93
- # Use AudioData to convert the raw data to wav data.
94
- with sr .Microphone () as source :
95
- audio_data = sr .AudioData (
96
- self .last_sample , source .SAMPLE_RATE , source .SAMPLE_WIDTH
97
- )
98
- return audio_data
52
+ def recognize (self ):
53
+ if self .audio :
54
+ # Transcribe the audio data to text using Whisper
55
+ print ("Recognizing..." )
56
+ attempts = 0
57
+ while attempts < 3 :
58
+ try :
59
+ result = self .recognizer .recognize_whisper_api (
60
+ self .audio , api_key = self .api_key
61
+ )
99
62
63
+ return result .strip ()
64
+ except sr .RequestError :
65
+ time .sleep (3 )
66
+ attempts += 1
67
+ print ("I wasn't able to understand you. Please repeat that." )
68
+ return None
100
69
return None
0 commit comments