@@ -88,6 +88,7 @@ class ConversationalAgentService : Service() {
8888 private val mainHandler by lazy { Handler (Looper .getMainLooper()) }
8989 private val memoryManager by lazy { MemoryManager .getInstance(this ) }
9090 private val usedMemories = mutableSetOf<String >() // Track memories already used in this conversation
91+ private var hasHeardFirstUtterance = false // Track if we've received the first user utterance
9192 private lateinit var firebaseAnalytics: FirebaseAnalytics
9293 private val eyes by lazy { Eyes (this ) }
9394
@@ -121,6 +122,7 @@ class ConversationalAgentService : Service() {
121122 clarificationAttempts = 0 // Reset clarification attempts counter
122123 sttErrorAttempts = 0 // Reset STT error attempts counter
123124 usedMemories.clear() // Clear used memories for new conversation
125+ hasHeardFirstUtterance = false // Reset first utterance flag
124126 visualFeedbackManager.showTtsWave()
125127 showInputBoxIfNeeded()
126128 visualFeedbackManager.showSpeakingOverlay() // <-- ADD THIS LINE
@@ -177,19 +179,17 @@ class ConversationalAgentService : Service() {
177179 firebaseAnalytics.logEvent(" conversation_initiated" , null )
178180 trackConversationStart()
179181
182+ // Skip greeting and start listening immediately
180183 serviceScope.launch {
181- if (conversationHistory.size == 1 ) {
182- val greeting = getPersonalizedGreeting()
183- conversationHistory = addResponse(" model" , greeting, conversationHistory)
184- trackMessage(" model" , greeting, " greeting" )
185- speakAndThenListen(greeting)
186- }
184+ Log .d(" ConvAgent" , " Starting immediate listening (no greeting)" )
185+ startImmediateListening()
187186 }
188187 return START_STICKY
189188 }
190189
191190 /* *
192191 * Gets a personalized greeting using the user's name from memories if available
192+ * NOTE: This method is kept for potential future use but no longer called on startup
193193 */
194194 private fun getPersonalizedGreeting (): String {
195195 try {
@@ -202,10 +202,95 @@ class ConversationalAgentService : Service() {
202202 }
203203 }
204204
205+ /* *
206+ * Starts listening immediately without speaking any greeting or performing memory extraction
207+ * Memory extraction will be deferred until after the first user utterance
208+ */
209+ @RequiresApi(Build .VERSION_CODES .R )
210+ private suspend fun startImmediateListening () {
211+ Log .d(" ConvAgent" , " Starting immediate listening without greeting" )
212+
213+ // Check if we are in text mode before starting to listen
214+ if (isTextModeActive) {
215+ Log .d(" ConvAgent" , " In text mode, ensuring input box is visible and skipping voice listening." )
216+ mainHandler.post {
217+ showInputBoxIfNeeded() // Re-show the input box for the next turn.
218+ }
219+ return // Skip starting the voice listener entirely.
220+ }
221+
222+ speechCoordinator.startListening(
223+ onResult = { recognizedText ->
224+ if (isTextModeActive) return @startListening // Ignore results in text mode
225+ Log .d(" ConvAgent" , " Final user transcription: $recognizedText " )
226+ visualFeedbackManager.updateTranscription(recognizedText)
227+ mainHandler.postDelayed({
228+ visualFeedbackManager.hideTranscription()
229+ }, 500 )
230+
231+ // Mark that we've heard the first utterance and trigger memory extraction
232+ if (! hasHeardFirstUtterance) {
233+ hasHeardFirstUtterance = true
234+ Log .d(" ConvAgent" , " First utterance received, triggering memory extraction" )
235+ serviceScope.launch {
236+ try {
237+ updateSystemPromptWithMemories()
238+ } catch (e: Exception ) {
239+ Log .e(" ConvAgent" , " Error during first utterance memory extraction" , e)
240+ // Continue execution even if memory extraction fails
241+ }
242+ }
243+ }
244+
245+ processUserInput(recognizedText)
246+ },
247+ onError = { error ->
248+ Log .e(" ConvAgent" , " STT Error: $error " )
249+ if (isTextModeActive) return @startListening // Ignore errors in text mode
250+
251+ // Track STT errors
252+ val sttErrorBundle = android.os.Bundle ().apply {
253+ putString(" error_message" , error.take(100 ))
254+ putInt(" error_attempt" , sttErrorAttempts + 1 )
255+ putInt(" max_attempts" , maxSttErrorAttempts)
256+ }
257+ firebaseAnalytics.logEvent(" stt_error" , sttErrorBundle)
258+
259+ visualFeedbackManager.hideTranscription()
260+ sttErrorAttempts++
261+ serviceScope.launch {
262+ if (sttErrorAttempts >= maxSttErrorAttempts) {
263+ firebaseAnalytics.logEvent(" conversation_ended_stt_errors" , null )
264+ val exitMessage = " I'm having trouble understanding you clearly. Please try calling later!"
265+ trackMessage(" model" , exitMessage, " error_message" )
266+ gracefulShutdown(exitMessage, " stt_errors" )
267+ } else {
268+ val retryMessage = " I'm sorry, I didn't catch that. Could you please repeat?"
269+ speakAndThenListen(retryMessage)
270+ }
271+ }
272+ },
273+ onPartialResult = { partialText ->
274+ if (isTextModeActive) return @startListening // Ignore partial results in text mode
275+ visualFeedbackManager.updateTranscription(partialText)
276+ },
277+ onListeningStateChange = { listening ->
278+ Log .d(" ConvAgent" , " Listening state: $listening " )
279+ if (listening) {
280+ if (isTextModeActive) return @startListening // Ignore state changes in text mode
281+ visualFeedbackManager.showTranscription()
282+ }
283+ }
284+ )
285+ }
286+
205287
206288 @RequiresApi(Build .VERSION_CODES .R )
207289 private suspend fun speakAndThenListen (text : String , draw : Boolean = true) {
208- updateSystemPromptWithMemories()
290+ // Only update system prompt with memories if we've heard the first utterance
291+ if (hasHeardFirstUtterance) {
292+ updateSystemPromptWithMemories()
293+ }
209294 ttsManager.setCaptionsEnabled(draw)
210295
211296 speechCoordinator.speakText(text)
@@ -227,6 +312,21 @@ class ConversationalAgentService : Service() {
227312 mainHandler.postDelayed({
228313 visualFeedbackManager.hideTranscription()
229314 }, 500 )
315+
316+ // Mark that we've heard the first utterance and trigger memory extraction if not already done
317+ if (! hasHeardFirstUtterance) {
318+ hasHeardFirstUtterance = true
319+ Log .d(" ConvAgent" , " First utterance received, triggering memory extraction" )
320+ serviceScope.launch {
321+ try {
322+ updateSystemPromptWithMemories()
323+ } catch (e: Exception ) {
324+ Log .e(" ConvAgent" , " Error during first utterance memory extraction" , e)
325+ // Continue execution even if memory extraction fails
326+ }
327+ }
328+ }
329+
230330 processUserInput(recognizedText)
231331
232332 },
@@ -337,6 +437,18 @@ class ConversationalAgentService : Service() {
337437 serviceScope.launch {
338438 removeClarificationQuestions()
339439 updateSystemPromptWithAgentStatus()
440+
441+ // Mark that we've heard the first utterance and trigger memory extraction if not already done
442+ if (! hasHeardFirstUtterance) {
443+ hasHeardFirstUtterance = true
444+ Log .d(" ConvAgent" , " First utterance received via processUserInput, triggering memory extraction" )
445+ try {
446+ updateSystemPromptWithMemories()
447+ } catch (e: Exception ) {
448+ Log .e(" ConvAgent" , " Error during first utterance memory extraction" , e)
449+ // Continue execution even if memory extraction fails
450+ }
451+ }
340452
341453 conversationHistory = addResponse(" user" , userInput, conversationHistory)
342454
@@ -447,7 +559,7 @@ class ConversationalAgentService : Service() {
447559 // Track freemium limit reached
448560 firebaseAnalytics.logEvent(" task_rejected_freemium_limit" , null )
449561
450- val upgradeMessage = " ${getPersonalizedGreeting()} You've used all your free tasks for the month. Please upgrade in the app to unlock more. We can still talk in voice mode."
562+ val upgradeMessage = " Hey! You've used all your free tasks for the month. Please upgrade in the app to unlock more. We can still talk in voice mode."
451563 conversationHistory = addResponse(" model" , upgradeMessage, conversationHistory)
452564 trackMessage(" model" , upgradeMessage, " freemium_limit" )
453565 speakAndThenListen(upgradeMessage)
0 commit comments