Skip to content

Commit 4f0d755

Browse files
Merge pull request #214 from Ayush0Chaudhary/minor-fixes
⏏️ Implemented the long press element, and the fixed the scroll AAAHH! :rage4:
2 parents 30ed29c + 11aed89 commit 4f0d755

File tree

7 files changed

+105
-20
lines changed

7 files changed

+105
-20
lines changed

app/src/main/java/com/blurr/voice/ScreenInteractionService.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,30 @@ class ScreenInteractionService : AccessibilityService() {
562562

563563
dispatchGesture(gesture, null, null)
564564
}
565+
/**
566+
* Performs a long press gesture at a specific point on the screen.
567+
* @param x The x-coordinate of the long press.
568+
* @param y The y-coordinate of the long press.
569+
*/
570+
fun longClickOnPoint(x: Float, y: Float) {
571+
// Show visual feedback for the tap if the debug flag is enabled
572+
if (DEBUG_SHOW_TAPS) {
573+
showDebugTap(x, y)
574+
}
575+
576+
val path = Path().apply {
577+
moveTo(x, y)
578+
}
579+
// A long press is essentially a tap that is held down.
580+
// 600ms is a common duration for a long press.
581+
val longPressStroke = GestureDescription.StrokeDescription(path, 0, 2000L)
582+
583+
val gesture = GestureDescription.Builder()
584+
.addStroke(longPressStroke)
585+
.build()
586+
587+
dispatchGesture(gesture, null, null)
588+
}
565589

566590
/**
567591
* Scrolls the screen down by a given number of pixels with more precision.

app/src/main/java/com/blurr/voice/api/Finger.kt

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,15 @@ class Finger(private val context: Context) {
9393
service?.clickOnPoint(x.toFloat(), y.toFloat())
9494
}
9595

96+
/**
97+
* Performs a long press (press and hold) at a specific point on the screen.
98+
*/
99+
fun longPress(x: Int, y: Int) {
100+
Log.d(TAG, "Long pressing at ($x, $y)")
101+
// This assumes your ScreenInteractionService has a method `longClickOnPoint`
102+
service?.longClickOnPoint(x.toFloat(), y.toFloat())
103+
}
104+
96105
/**
97106
* Swipes between two points on the screen.
98107
*/
@@ -150,7 +159,7 @@ class Finger(private val context: Context) {
150159
* @param pixels The number of pixels to scroll.
151160
* @param duration The duration of the swipe in milliseconds.
152161
*/
153-
fun scrollDown(pixels: Int, duration: Int = 500) {
162+
fun scrollUp(pixels: Int, duration: Int = 500) {
154163
val displayMetrics = context.resources.displayMetrics
155164
val screenWidth = displayMetrics.widthPixels
156165
val screenHeight = displayMetrics.heightPixels
@@ -173,7 +182,7 @@ class Finger(private val context: Context) {
173182
* @param pixels The number of pixels to scroll.
174183
* @param duration The duration of the swipe in milliseconds.
175184
*/
176-
fun scrollUp(pixels: Int, duration: Int = 500) {
185+
fun scrollDown(pixels: Int, duration: Int = 500) {
177186
val displayMetrics = context.resources.displayMetrics
178187
val screenWidth = displayMetrics.widthPixels
179188
val screenHeight = displayMetrics.heightPixels

app/src/main/java/com/blurr/voice/utilities/STTManager.kt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@ class STTManager(private val context: Context) {
2222
private var onPartialResultCallback: ((String) -> Unit)? = null
2323
private var isInitialized = false
2424
private val visualizerManager = STTVisualizer(context)
25+
private val COMPLETE_SILENCE_MS = 2500 // time of silence to consider input complete
26+
private val POSSIBLE_SILENCE_MS = 2000 // shorter silence hint window
27+
private val MIN_UTTERANCE_MS = 1500 // enforce a minimum listening duration
2528

2629

27-
// Remove initialization from constructor - will be done lazily on main thread
28-
30+
2931
private fun initializeSpeechRecognizer() {
3032
if (isInitialized) return
3133

@@ -58,12 +60,10 @@ class STTManager(private val context: Context) {
5860
}
5961

6062
override fun onRmsChanged(rmsdB: Float) {
61-
// --- NEW: Invoke the callback with the new audio level ---
6263
visualizerManager.onRmsChanged(rmsdB)
6364
}
6465

6566
override fun onBufferReceived(buffer: ByteArray?) {
66-
// Optional: Can be used for real-time processing
6767
}
6868

6969
override fun onEndOfSpeech() {
@@ -113,7 +113,6 @@ class STTManager(private val context: Context) {
113113
}
114114

115115
override fun onPartialResults(partialResults: Bundle?) {
116-
// v-- IMPLEMENT THIS METHOD --v
117116
val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
118117
if (!matches.isNullOrEmpty()) {
119118
val partialText = matches[0]
@@ -123,7 +122,6 @@ class STTManager(private val context: Context) {
123122
}
124123

125124
override fun onEvent(eventType: Int, params: Bundle?) {
126-
// Optional: Handle specific events
127125
}
128126
}
129127
}
@@ -144,7 +142,6 @@ class STTManager(private val context: Context) {
144142
this.onListeningStateChange = onListeningStateChange
145143
this.onPartialResultCallback = onPartialResult
146144

147-
// Initialize on main thread if needed
148145
CoroutineScope(Dispatchers.Main).launch {
149146
initializeSpeechRecognizer()
150147

@@ -160,6 +157,9 @@ class STTManager(private val context: Context) {
160157
putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
161158
putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
162159
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
160+
// putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, COMPLETE_SILENCE_MS)
161+
// putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, POSSIBLE_SILENCE_MS)
162+
// putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, MIN_UTTERANCE_MS)
163163
}
164164

165165
try {

app/src/main/java/com/blurr/voice/v2/Agent.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import com.blurr.voice.v2.llm.GeminiApi
1010
import com.blurr.voice.v2.llm.GeminiMessage
1111
import com.blurr.voice.v2.message_manager.MemoryManager
1212
import com.blurr.voice.v2.perception.Perception
13+
import com.blurr.voice.utilities.SpeechCoordinator
1314
import kotlinx.coroutines.delay
1415

1516
/**
@@ -37,6 +38,9 @@ class Agent(
3738
// The agent's internal state, which is updated at each step.
3839
val state: AgentState = AgentState()
3940
private val TAG = "AgentV2"
41+
42+
// Speech coordinator for voice notifications
43+
private val speechCoordinator = SpeechCoordinator.getInstance(context)
4044

4145
// A complete, long-term record of the entire session.
4246
// We use <Unit> because we haven't defined a custom structured output for the 'done' action yet.
@@ -83,6 +87,7 @@ class Agent(
8387
memoryManager.addContextMessage(GeminiMessage(text = "System Note: Your previous output was not valid JSON. Please ensure your response is correctly formatted."))
8488
if (state.consecutiveFailures >= settings.maxFailures) {
8589
Log.d(TAG,"❌ Agent failed too many times consecutively. Stopping.")
90+
speechCoordinator.speakToUser("Agent failed after multiple attempts. Stopping execution.")
8691
break
8792
}
8893
delay(1000) // Wait a moment before retrying
@@ -122,6 +127,7 @@ class Agent(
122127
// --- Check for Task Completion ---
123128
if (actionResults.any { it.isDone == true }) {
124129
Log.d(TAG,"✅ Agent finished the task.")
130+
speechCoordinator.speakToUser("Task completed successfully.")
125131
state.stopped = true
126132
}
127133

@@ -131,8 +137,8 @@ class Agent(
131137

132138
// --- Loop Finished ---
133139
if (state.nSteps > maxSteps) {
134-
135140
Log.d(TAG,"--- 🏁 Agent reached max steps. Stopping. ---")
141+
speechCoordinator.speakToUser("Agent reached maximum steps limit. Stopping execution.")
136142
} else {
137143
Log.d(TAG,"--- 🏁 Agent run finished. ---")
138144
}

app/src/main/java/com/blurr/voice/v2/AgentService.kt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package com.blurr.voice.v2
33
import android.app.Notification
44
import android.app.NotificationChannel
55
import android.app.NotificationManager
6+
import android.app.PendingIntent
67
import android.app.Service
78
import android.content.Context
89
import android.content.Intent
@@ -84,8 +85,8 @@ class AgentService : Service() {
8485
//
8586
//
8687
companion object {
87-
private const val NOTIFICATION_CHANNEL_ID = "AgentServiceChannel"
88-
private const val NOTIFICATION_ID = 1
88+
private const val NOTIFICATION_CHANNEL_ID = "AgentServiceChannelV2"
89+
private const val NOTIFICATION_ID = 14
8990
private const val EXTRA_TASK = "com.blurr.voice.v2.EXTRA_TASK"
9091
private const val ACTION_STOP_SERVICE = "com.blurr.voice.v2.ACTION_STOP_SERVICE"
9192

@@ -243,9 +244,26 @@ class AgentService : Service() {
243244
* Creates the persistent notification for the foreground service.
244245
*/
245246
private fun createNotification(contentText: String): Notification {
247+
// Create PendingIntent for the stop action
248+
val stopIntent = Intent(this, AgentService::class.java).apply {
249+
action = ACTION_STOP_SERVICE
250+
}
251+
val stopPendingIntent = PendingIntent.getService(
252+
this,
253+
0,
254+
stopIntent,
255+
PendingIntent.FLAG_UPDATE_CURRENT or PendingIntent.FLAG_IMMUTABLE
256+
)
257+
246258
return NotificationCompat.Builder(this, NOTIFICATION_CHANNEL_ID)
247259
.setContentTitle("AI Agent Active")
248260
.setContentText(contentText)
261+
.addAction(
262+
android.R.drawable.ic_media_pause, // Using built-in pause icon as stop button
263+
"Stop Agent",
264+
stopPendingIntent
265+
)
266+
.setOngoing(true) // Makes notification persistent and harder to dismiss
249267
// .setSmallIcon(R.drawable.ic_agent_notification) // TODO: Add a notification icon
250268
.build()
251269
}

app/src/main/java/com/blurr/voice/v2/actions/Action.kt

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ data class ParamSpec(val name: String, val type: KClass<*>, val description: Str
2222
sealed class Action {
2323
// Each action is a data class (if it has args) or an object (if it doesn't).
2424
// Note: Property names here follow Kotlin's camelCase convention.
25+
data class LongPressElement(val elementId: Int) : Action()
2526
data class TapElement(val elementId: Int) : Action()
2627
data object SwitchApp : Action()
2728
data object Back : Action()
@@ -129,16 +130,22 @@ sealed class Action {
129130
params = listOf(ParamSpec("app_name", String::class, "The name of the app.")),
130131
build = { args -> OpenApp(args["app_name"] as String) }
131132
),
132-
"scroll_down" to Spec(
133-
name = "scroll_down",
134-
description = "Scroll down by the specified amount of pixels.",
135-
params = listOf(ParamSpec("amount", Int::class, "Amount of pixels to scroll down.")),
133+
"swipe_down" to Spec(
134+
name = "swipe_down",
135+
description = "swipe down by the specified amount of pixels.",
136+
params = listOf(ParamSpec("amount", Int::class, "Amount of pixels to swipe down.")),
136137
build = { args -> ScrollDown(args["amount"] as Int) }
137138
),
138-
"scroll_up" to Spec(
139-
name = "scroll_up",
140-
description = "Scroll up by the specified amount of pixels.",
141-
params = listOf(ParamSpec("amount", Int::class, "Amount of pixels to scroll up.")),
139+
"long_press_element" to Spec(
140+
name = "long_press_element",
141+
description = "Press and hold the element with the specified numeric ID. Useful for context menus, selecting text, etc.",
142+
params = listOf(ParamSpec("element_id", Int::class, "The numeric ID of the element to long press.")),
143+
build = { args -> LongPressElement(args["element_id"] as Int) }
144+
),
145+
"swipe_up" to Spec(
146+
name = "swipe_up",
147+
description = "swipe up by the specified amount of pixels.",
148+
params = listOf(ParamSpec("amount", Int::class, "Amount of pixels to swipe up.")),
142149
build = { args -> ScrollUp(args["amount"] as Int) }
143150
),
144151
"search_google" to Spec(

app/src/main/java/com/blurr/voice/v2/actions/ActionExecutor.kt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,27 @@ class ActionExecutor(private val finger: Finger) {
117117
includeExtractedContentOnlyOnce = true
118118
)
119119
}
120+
is Action.LongPressElement -> {
121+
val elementNode = screenAnalysis.elementMap[action.elementId]
122+
if (elementNode != null) {
123+
val bounds = elementNode.attributes["bounds"]
124+
val text = elementNode.getVisibleText().replace("\n", " ")
125+
val resourceId = elementNode.attributes["resource-id"] ?: ""
126+
val extraInfo = elementNode.extraInfo
127+
val className = (elementNode.attributes["class"] ?: "").removePrefix("android.")
128+
129+
if (bounds != null) {
130+
val (centerX, centerY) = getCenterFromBounds(bounds)
131+
// Assuming finger has a longPress method. Adjust if necessary.
132+
finger.longPress(centerX, centerY)
133+
ActionResult(longTermMemory = "Long-pressed element text:$text <$resourceId> <$extraInfo> <$className>")
134+
} else {
135+
ActionResult(error = "Element with ID ${action.elementId} has no bounds information.")
136+
}
137+
} else {
138+
ActionResult(error = "Element with ID ${action.elementId} not found in the current screen state.")
139+
}
140+
}
120141
is Action.OpenApp -> {
121142
val packageName = findPackageNameFromAppName(action.appName, context)
122143
if (packageName != null) {

0 commit comments

Comments
 (0)