Skip to content

Commit d2118b3

Browse files
[Android] Support LLaVA and Phi-V (#3195)
This PR introduces the support of LLaVA and Phi-V on android device. Not thoroughly tested, but it works on my device. (Android 14.0) checkpoints: - https://huggingface.co/davidlightmysterion/llava-1.5-7b-hf-q4f16_1-MLC - https://huggingface.co/mlc-ai/Phi-3.5-vision-instruct-q4f16_1-MLC
1 parent 3fb97a7 commit d2118b3

File tree

6 files changed

+286
-32
lines changed

6 files changed

+286
-32
lines changed

android/MLCChat/app/src/main/AndroidManifest.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package="ai.mlc.mlcchat">
55

66
<uses-permission android:name="android.permission.INTERNET" />
7+
<uses-permission android:name="android.permission.READ_MEDIA_IMAGES" />
78
<uses-permission
89
android:name="android.permission.WRITE_EXTERNAL_STORAGE"
910
android:maxSdkVersion="32"

android/MLCChat/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,15 @@ import java.util.UUID
2323
import java.util.concurrent.Executors
2424
import kotlin.concurrent.thread
2525
import ai.mlc.mlcllm.OpenAIProtocol.ChatCompletionMessage
26+
import ai.mlc.mlcllm.OpenAIProtocol.ChatCompletionMessageContent
27+
import android.app.Activity
2628
import kotlinx.coroutines.*
29+
import android.graphics.Bitmap
30+
import android.graphics.BitmapFactory
31+
import android.net.Uri
32+
import java.io.ByteArrayOutputStream
33+
import android.util.Base64
34+
import android.util.Log
2735

2836
class AppViewModel(application: Application) : AndroidViewModel(application) {
2937
val modelList = emptyList<ModelState>().toMutableStateList()
@@ -511,7 +519,9 @@ class AppViewModel(application: Application) : AndroidViewModel(application) {
511519
private var modelPath = ""
512520
private val executorService = Executors.newSingleThreadExecutor()
513521
private val viewModelScope = CoroutineScope(Dispatchers.Main + Job())
522+
private var imageUri: Uri? = null
514523
private fun mainResetChat() {
524+
imageUri = null
515525
executorService.submit {
516526
callBackend { engine.reset() }
517527
historyMessages = mutableListOf<ChatCompletionMessage>()
@@ -660,16 +670,58 @@ class AppViewModel(application: Application) : AndroidViewModel(application) {
660670
}
661671
}
662672

663-
fun requestGenerate(prompt: String) {
673+
fun requestImageBitmap(uri: Uri?) {
674+
require(chatable())
675+
switchToGenerating()
676+
executorService.submit {
677+
imageUri = uri
678+
viewModelScope.launch {
679+
report.value = "Image process is done, ask any question."
680+
if (modelChatState.value == ModelChatState.Generating) switchToReady()
681+
}
682+
}
683+
}
684+
685+
fun bitmapToURL(bm: Bitmap): String {
686+
val targetSize = 336
687+
val scaledBitmap = Bitmap.createScaledBitmap(bm, targetSize, targetSize, true)
688+
689+
val outputStream = ByteArrayOutputStream()
690+
scaledBitmap.compress(Bitmap.CompressFormat.JPEG, 100, outputStream)
691+
scaledBitmap.recycle()
692+
693+
val imageBytes = outputStream.toByteArray()
694+
val imageBase64 = Base64.encodeToString(imageBytes, Base64.NO_WRAP)
695+
return "data:image/jpg;base64,$imageBase64"
696+
}
697+
698+
fun requestGenerate(prompt: String, activity: Activity) {
664699
require(chatable())
665700
switchToGenerating()
666701
appendMessage(MessageRole.User, prompt)
667702
appendMessage(MessageRole.Assistant, "")
703+
var content = ChatCompletionMessageContent(text=prompt)
704+
if (imageUri != null) {
705+
val uri = imageUri
706+
val bitmap = uri?.let {
707+
activity.contentResolver.openInputStream(it)?.use { input ->
708+
BitmapFactory.decodeStream(input)
709+
}
710+
}
711+
val imageBase64URL = bitmapToURL(bitmap!!)
712+
Log.v("requestGenerate", "image base64 url: $imageBase64URL")
713+
val parts = listOf(
714+
mapOf("type" to "text", "text" to prompt),
715+
mapOf("type" to "image_url", "image_url" to imageBase64URL)
716+
)
717+
content = ChatCompletionMessageContent(parts=parts)
718+
imageUri = null
719+
}
668720

669721
executorService.submit {
670722
historyMessages.add(ChatCompletionMessage(
671723
role = OpenAIProtocol.ChatCompletionRole.user,
672-
content = prompt
724+
content = content
673725
))
674726

675727
viewModelScope.launch {
@@ -768,7 +820,7 @@ enum class MessageRole {
768820

769821
data class DownloadTask(val url: URL, val file: File)
770822

771-
data class MessageData(val role: MessageRole, val text: String, val id: UUID = UUID.randomUUID())
823+
data class MessageData(val role: MessageRole, val text: String, val id: UUID = UUID.randomUUID(), var imageUri: Uri? = null)
772824

773825
data class AppConfig(
774826
@SerializedName("model_libs") var modelLibs: MutableList<String>,

android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ChatView.kt

Lines changed: 88 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
package ai.mlc.mlcchat
22

3+
import android.app.Activity
4+
import android.graphics.Bitmap
5+
import android.graphics.BitmapFactory
6+
import androidx.compose.foundation.Image
37
import androidx.compose.foundation.background
48
import androidx.compose.foundation.gestures.detectTapGestures
59
import androidx.compose.foundation.layout.Arrangement
@@ -20,7 +24,9 @@ import androidx.compose.foundation.lazy.rememberLazyListState
2024
import androidx.compose.foundation.shape.RoundedCornerShape
2125
import androidx.compose.foundation.text.selection.SelectionContainer
2226
import androidx.compose.material.icons.Icons
27+
import androidx.compose.material.icons.filled.AddAPhoto
2328
import androidx.compose.material.icons.filled.ArrowBack
29+
import androidx.compose.material.icons.filled.Photo
2430
import androidx.compose.material.icons.filled.Replay
2531
import androidx.compose.material.icons.filled.Send
2632
import androidx.compose.material3.Divider
@@ -43,6 +49,7 @@ import androidx.compose.runtime.saveable.rememberSaveable
4349
import androidx.compose.runtime.setValue
4450
import androidx.compose.ui.Alignment
4551
import androidx.compose.ui.Modifier
52+
import androidx.compose.ui.graphics.asImageBitmap
4653
import androidx.compose.ui.input.pointer.pointerInput
4754
import androidx.compose.ui.platform.LocalFocusManager
4855
import androidx.compose.ui.text.style.TextAlign
@@ -55,9 +62,10 @@ import kotlinx.coroutines.launch
5562
@ExperimentalMaterial3Api
5663
@Composable
5764
fun ChatView(
58-
navController: NavController, chatState: AppViewModel.ChatState
65+
navController: NavController, chatState: AppViewModel.ChatState, activity: Activity
5966
) {
6067
val localFocusManager = LocalFocusManager.current
68+
(activity as MainActivity).chatState = chatState
6169
Scaffold(topBar = {
6270
TopAppBar(
6371
title = {
@@ -81,7 +89,9 @@ fun ChatView(
8189
},
8290
actions = {
8391
IconButton(
84-
onClick = { chatState.requestResetChat() },
92+
onClick = {
93+
chatState.requestResetChat()
94+
activity.hasImage = false },
8595
enabled = chatState.interruptable()
8696
) {
8797
Icon(
@@ -125,23 +135,23 @@ fun ChatView(
125135
items = chatState.messages,
126136
key = { message -> message.id },
127137
) { message ->
128-
MessageView(messageData = message)
138+
MessageView(messageData = message, activity)
129139
}
130140
item {
131141
// place holder item for scrolling to the bottom
132142
}
133143
}
134144
Divider(thickness = 1.dp, modifier = Modifier.padding(top = 5.dp))
135-
SendMessageView(chatState = chatState)
145+
SendMessageView(chatState = chatState, activity)
136146
}
137147
}
138148
}
139149

140150
@Composable
141-
fun MessageView(messageData: MessageData) {
151+
fun MessageView(messageData: MessageData, activity: Activity?) {
142152
// default render the Assistant text as MarkdownText
143153
var useMarkdown by remember { mutableStateOf(true) }
144-
154+
var localActivity : MainActivity = activity as MainActivity
145155
SelectionContainer {
146156
if (messageData.role == MessageRole.Assistant) {
147157
Column {
@@ -202,19 +212,47 @@ fun MessageView(messageData: MessageData) {
202212
horizontalArrangement = Arrangement.End,
203213
modifier = Modifier.fillMaxWidth()
204214
) {
205-
Text(
206-
text = messageData.text,
207-
textAlign = TextAlign.Right,
208-
color = MaterialTheme.colorScheme.onPrimaryContainer,
209-
modifier = Modifier
210-
.wrapContentWidth()
211-
.background(
212-
color = MaterialTheme.colorScheme.primaryContainer,
213-
shape = RoundedCornerShape(5.dp)
215+
if (messageData.imageUri != null) {
216+
val uri = messageData.imageUri
217+
val bitmap = uri?.let {
218+
activity.contentResolver.openInputStream(it)?.use { input ->
219+
BitmapFactory.decodeStream(input)
220+
}
221+
}
222+
val displayBitmap = bitmap?.let { Bitmap.createScaledBitmap(it, 224, 224, true) }
223+
if (displayBitmap != null) {
224+
Image(
225+
displayBitmap.asImageBitmap(),
226+
"",
227+
modifier = Modifier
228+
.wrapContentWidth()
229+
.background(
230+
color = MaterialTheme.colorScheme.secondaryContainer,
231+
shape = RoundedCornerShape(5.dp)
232+
)
233+
.padding(5.dp)
234+
.widthIn(max = 300.dp)
214235
)
215-
.padding(5.dp)
216-
.widthIn(max = 300.dp)
217-
)
236+
}
237+
if (!localActivity.hasImage) {
238+
localActivity.chatState.requestImageBitmap(messageData.imageUri)
239+
}
240+
localActivity.hasImage = true
241+
} else {
242+
Text(
243+
text = messageData.text,
244+
textAlign = TextAlign.Right,
245+
color = MaterialTheme.colorScheme.onPrimaryContainer,
246+
modifier = Modifier
247+
.wrapContentWidth()
248+
.background(
249+
color = MaterialTheme.colorScheme.primaryContainer,
250+
shape = RoundedCornerShape(5.dp)
251+
)
252+
.padding(5.dp)
253+
.widthIn(max = 300.dp)
254+
)
255+
}
218256

219257
}
220258
}
@@ -223,8 +261,9 @@ fun MessageView(messageData: MessageData) {
223261

224262
@ExperimentalMaterial3Api
225263
@Composable
226-
fun SendMessageView(chatState: AppViewModel.ChatState) {
264+
fun SendMessageView(chatState: AppViewModel.ChatState, activity: Activity) {
227265
val localFocusManager = LocalFocusManager.current
266+
val localActivity : MainActivity = activity as MainActivity
228267
Row(
229268
horizontalArrangement = Arrangement.spacedBy(5.dp),
230269
verticalAlignment = Alignment.CenterVertically,
@@ -241,10 +280,38 @@ fun SendMessageView(chatState: AppViewModel.ChatState) {
241280
modifier = Modifier
242281
.weight(9f),
243282
)
283+
IconButton(
284+
onClick = {
285+
activity.takePhoto()
286+
},
287+
modifier = Modifier
288+
.aspectRatio(1f)
289+
.weight(1f),
290+
enabled = (chatState.chatable() && !localActivity.hasImage)
291+
) {
292+
Icon(
293+
imageVector = Icons.Filled.AddAPhoto,
294+
contentDescription = "use camera",
295+
)
296+
}
297+
IconButton(
298+
onClick = {
299+
activity.pickImageFromGallery()
300+
},
301+
modifier = Modifier
302+
.aspectRatio(1f)
303+
.weight(1f),
304+
enabled = (chatState.chatable() && !localActivity.hasImage)
305+
) {
306+
Icon(
307+
imageVector = Icons.Filled.Photo,
308+
contentDescription = "select image",
309+
)
310+
}
244311
IconButton(
245312
onClick = {
246313
localFocusManager.clearFocus()
247-
chatState.requestGenerate(text)
314+
chatState.requestGenerate(text, activity)
248315
text = ""
249316
},
250317
modifier = Modifier
@@ -271,6 +338,6 @@ fun MessageViewPreviewWithMarkdown() {
271338
* [Link](https://example.com)
272339
<a href="https://www.google.com/">Google</a>
273340
"""
274-
)
341+
), null
275342
)
276343
}

0 commit comments

Comments
 (0)