Skip to content

[Android] Support LLaVA and Phi-V #3195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions android/MLCChat/app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package="ai.mlc.mlcchat">

<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.READ_MEDIA_IMAGES" />
<uses-permission
android:name="android.permission.WRITE_EXTERNAL_STORAGE"
android:maxSdkVersion="32"
Expand Down
58 changes: 55 additions & 3 deletions android/MLCChat/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@ import java.util.UUID
import java.util.concurrent.Executors
import kotlin.concurrent.thread
import ai.mlc.mlcllm.OpenAIProtocol.ChatCompletionMessage
import ai.mlc.mlcllm.OpenAIProtocol.ChatCompletionMessageContent
import android.app.Activity
import kotlinx.coroutines.*
import android.graphics.Bitmap
import android.graphics.BitmapFactory
import android.net.Uri
import java.io.ByteArrayOutputStream
import android.util.Base64
import android.util.Log

class AppViewModel(application: Application) : AndroidViewModel(application) {
val modelList = emptyList<ModelState>().toMutableStateList()
Expand Down Expand Up @@ -511,7 +519,9 @@ class AppViewModel(application: Application) : AndroidViewModel(application) {
private var modelPath = ""
private val executorService = Executors.newSingleThreadExecutor()
private val viewModelScope = CoroutineScope(Dispatchers.Main + Job())
private var imageUri: Uri? = null
private fun mainResetChat() {
imageUri = null
executorService.submit {
callBackend { engine.reset() }
historyMessages = mutableListOf<ChatCompletionMessage>()
Expand Down Expand Up @@ -660,16 +670,58 @@ class AppViewModel(application: Application) : AndroidViewModel(application) {
}
}

fun requestGenerate(prompt: String) {
fun requestImageBitmap(uri: Uri?) {
require(chatable())
switchToGenerating()
executorService.submit {
imageUri = uri
viewModelScope.launch {
report.value = "Image process is done, ask any question."
if (modelChatState.value == ModelChatState.Generating) switchToReady()
}
}
}

fun bitmapToURL(bm: Bitmap): String {
val targetSize = 336
val scaledBitmap = Bitmap.createScaledBitmap(bm, targetSize, targetSize, true)

val outputStream = ByteArrayOutputStream()
scaledBitmap.compress(Bitmap.CompressFormat.JPEG, 100, outputStream)
scaledBitmap.recycle()

val imageBytes = outputStream.toByteArray()
val imageBase64 = Base64.encodeToString(imageBytes, Base64.NO_WRAP)
return "data:image/jpg;base64,$imageBase64"
}

fun requestGenerate(prompt: String, activity: Activity) {
require(chatable())
switchToGenerating()
appendMessage(MessageRole.User, prompt)
appendMessage(MessageRole.Assistant, "")
var content = ChatCompletionMessageContent(text=prompt)
if (imageUri != null) {
val uri = imageUri
val bitmap = uri?.let {
activity.contentResolver.openInputStream(it)?.use { input ->
BitmapFactory.decodeStream(input)
}
}
val imageBase64URL = bitmapToURL(bitmap!!)
Log.v("requestGenerate", "image base64 url: $imageBase64URL")
val parts = listOf(
mapOf("type" to "text", "text" to prompt),
mapOf("type" to "image_url", "image_url" to imageBase64URL)
)
content = ChatCompletionMessageContent(parts=parts)
imageUri = null
}

executorService.submit {
historyMessages.add(ChatCompletionMessage(
role = OpenAIProtocol.ChatCompletionRole.user,
content = prompt
content = content
))

viewModelScope.launch {
Expand Down Expand Up @@ -768,7 +820,7 @@ enum class MessageRole {

data class DownloadTask(val url: URL, val file: File)

data class MessageData(val role: MessageRole, val text: String, val id: UUID = UUID.randomUUID())
data class MessageData(val role: MessageRole, val text: String, val id: UUID = UUID.randomUUID(), var imageUri: Uri? = null)

data class AppConfig(
@SerializedName("model_libs") var modelLibs: MutableList<String>,
Expand Down
109 changes: 88 additions & 21 deletions android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ChatView.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package ai.mlc.mlcchat

import android.app.Activity
import android.graphics.Bitmap
import android.graphics.BitmapFactory
import androidx.compose.foundation.Image
import androidx.compose.foundation.background
import androidx.compose.foundation.gestures.detectTapGestures
import androidx.compose.foundation.layout.Arrangement
Expand All @@ -20,7 +24,9 @@ import androidx.compose.foundation.lazy.rememberLazyListState
import androidx.compose.foundation.shape.RoundedCornerShape
import androidx.compose.foundation.text.selection.SelectionContainer
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.AddAPhoto
import androidx.compose.material.icons.filled.ArrowBack
import androidx.compose.material.icons.filled.Photo
import androidx.compose.material.icons.filled.Replay
import androidx.compose.material.icons.filled.Send
import androidx.compose.material3.Divider
Expand All @@ -43,6 +49,7 @@ import androidx.compose.runtime.saveable.rememberSaveable
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.graphics.asImageBitmap
import androidx.compose.ui.input.pointer.pointerInput
import androidx.compose.ui.platform.LocalFocusManager
import androidx.compose.ui.text.style.TextAlign
Expand All @@ -55,9 +62,10 @@ import kotlinx.coroutines.launch
@ExperimentalMaterial3Api
@Composable
fun ChatView(
navController: NavController, chatState: AppViewModel.ChatState
navController: NavController, chatState: AppViewModel.ChatState, activity: Activity
) {
val localFocusManager = LocalFocusManager.current
(activity as MainActivity).chatState = chatState
Scaffold(topBar = {
TopAppBar(
title = {
Expand All @@ -81,7 +89,9 @@ fun ChatView(
},
actions = {
IconButton(
onClick = { chatState.requestResetChat() },
onClick = {
chatState.requestResetChat()
activity.hasImage = false },
enabled = chatState.interruptable()
) {
Icon(
Expand Down Expand Up @@ -125,23 +135,23 @@ fun ChatView(
items = chatState.messages,
key = { message -> message.id },
) { message ->
MessageView(messageData = message)
MessageView(messageData = message, activity)
}
item {
// place holder item for scrolling to the bottom
}
}
Divider(thickness = 1.dp, modifier = Modifier.padding(top = 5.dp))
SendMessageView(chatState = chatState)
SendMessageView(chatState = chatState, activity)
}
}
}

@Composable
fun MessageView(messageData: MessageData) {
fun MessageView(messageData: MessageData, activity: Activity?) {
// default render the Assistant text as MarkdownText
var useMarkdown by remember { mutableStateOf(true) }

var localActivity : MainActivity = activity as MainActivity
SelectionContainer {
if (messageData.role == MessageRole.Assistant) {
Column {
Expand Down Expand Up @@ -202,19 +212,47 @@ fun MessageView(messageData: MessageData) {
horizontalArrangement = Arrangement.End,
modifier = Modifier.fillMaxWidth()
) {
Text(
text = messageData.text,
textAlign = TextAlign.Right,
color = MaterialTheme.colorScheme.onPrimaryContainer,
modifier = Modifier
.wrapContentWidth()
.background(
color = MaterialTheme.colorScheme.primaryContainer,
shape = RoundedCornerShape(5.dp)
if (messageData.imageUri != null) {
val uri = messageData.imageUri
val bitmap = uri?.let {
activity.contentResolver.openInputStream(it)?.use { input ->
BitmapFactory.decodeStream(input)
}
}
val displayBitmap = bitmap?.let { Bitmap.createScaledBitmap(it, 224, 224, true) }
if (displayBitmap != null) {
Image(
displayBitmap.asImageBitmap(),
"",
modifier = Modifier
.wrapContentWidth()
.background(
color = MaterialTheme.colorScheme.secondaryContainer,
shape = RoundedCornerShape(5.dp)
)
.padding(5.dp)
.widthIn(max = 300.dp)
)
.padding(5.dp)
.widthIn(max = 300.dp)
)
}
if (!localActivity.hasImage) {
localActivity.chatState.requestImageBitmap(messageData.imageUri)
}
localActivity.hasImage = true
} else {
Text(
text = messageData.text,
textAlign = TextAlign.Right,
color = MaterialTheme.colorScheme.onPrimaryContainer,
modifier = Modifier
.wrapContentWidth()
.background(
color = MaterialTheme.colorScheme.primaryContainer,
shape = RoundedCornerShape(5.dp)
)
.padding(5.dp)
.widthIn(max = 300.dp)
)
}

}
}
Expand All @@ -223,8 +261,9 @@ fun MessageView(messageData: MessageData) {

@ExperimentalMaterial3Api
@Composable
fun SendMessageView(chatState: AppViewModel.ChatState) {
fun SendMessageView(chatState: AppViewModel.ChatState, activity: Activity) {
val localFocusManager = LocalFocusManager.current
val localActivity : MainActivity = activity as MainActivity
Row(
horizontalArrangement = Arrangement.spacedBy(5.dp),
verticalAlignment = Alignment.CenterVertically,
Expand All @@ -241,10 +280,38 @@ fun SendMessageView(chatState: AppViewModel.ChatState) {
modifier = Modifier
.weight(9f),
)
IconButton(
onClick = {
activity.takePhoto()
},
modifier = Modifier
.aspectRatio(1f)
.weight(1f),
enabled = (chatState.chatable() && !localActivity.hasImage)
) {
Icon(
imageVector = Icons.Filled.AddAPhoto,
contentDescription = "use camera",
)
}
IconButton(
onClick = {
activity.pickImageFromGallery()
},
modifier = Modifier
.aspectRatio(1f)
.weight(1f),
enabled = (chatState.chatable() && !localActivity.hasImage)
) {
Icon(
imageVector = Icons.Filled.Photo,
contentDescription = "select image",
)
}
IconButton(
onClick = {
localFocusManager.clearFocus()
chatState.requestGenerate(text)
chatState.requestGenerate(text, activity)
text = ""
},
modifier = Modifier
Expand All @@ -271,6 +338,6 @@ fun MessageViewPreviewWithMarkdown() {
* [Link](https://example.com)
<a href="https://www.google.com/">Google</a>
"""
)
), null
)
}
Loading