Skip to content

Support homophone replacer in Android asr demo. #2210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import android.widget.Button
import android.widget.TextView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import java.io.File
import java.io.FileOutputStream
import java.io.IOException
import kotlin.concurrent.thread

private const val TAG = "sherpa-onnx"
Expand Down Expand Up @@ -199,8 +202,22 @@ class MainActivity : AppCompatActivity() {
var ruleFsts : String?
ruleFsts = null

val useHr = false
val hr = HomophoneReplacerConfig(
// Used only when useHr is true
// Please download the following 3 files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
//
// dict and lexicon.txt can be shared by different apps
//
// replace.fst is specific for an app
dictDir = "dict",
lexicon = "lexicon.txt",
ruleFsts = "replace.fst",
)

Log.i(TAG, "Select model type $type")
val config = OnlineRecognizerConfig(
var config = OnlineRecognizerConfig(
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
modelConfig = getModelConfig(type = type)!!,
// lmConfig = getOnlineLMConfig(type = type),
Expand All @@ -212,9 +229,66 @@ class MainActivity : AppCompatActivity() {
config.ruleFsts = ruleFsts
}

if (useHr) {
if (hr.dictDir.isNotEmpty() && hr.dictDir.first() != '/') {
// We need to copy it from the assets directory to some path
val newDir = copyDataDir(hr.dictDir)
hr.dictDir = "$newDir/${hr.dictDir}"
}
config.hr = hr
}

recognizer = OnlineRecognizer(
assetManager = application.assets,
config = config,
)
}
private fun copyDataDir(dataDir: String): String {
Log.i(TAG, "data dir is $dataDir")
copyAssets(dataDir)

val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
Log.i(TAG, "newDataDir: $newDataDir")
return newDataDir
}

private fun copyAssets(path: String) {
val assets: Array<String>?
try {
assets = application.assets.list(path)
if (assets!!.isEmpty()) {
copyFile(path)
} else {
val fullPath = "${application.getExternalFilesDir(null)}/$path"
val dir = File(fullPath)
dir.mkdirs()
for (asset in assets.iterator()) {
val p: String = if (path == "") "" else path + "/"
copyAssets(p + asset)
}
}
} catch (ex: IOException) {
Log.e(TAG, "Failed to copy $path. $ex")
}
}

private fun copyFile(filename: String) {
try {
val istream = application.assets.open(filename)
val newFilename = application.getExternalFilesDir(null).toString() + "/" + filename
val ostream = FileOutputStream(newFilename)
// Log.i(TAG, "Copying $filename to $newFilename")
val buffer = ByteArray(1024)
var read = 0
while (read != -1) {
ostream.write(buffer, 0, read)
read = istream.read(buffer)
}
istream.close()
ostream.flush()
ostream.close()
} catch (ex: Exception) {
Log.e(TAG, "Failed to copy $filename, $ex")
}
}
}
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/homophone-replacer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ HomophoneReplacer::HomophoneReplacer(Manager *mgr,
HomophoneReplacer::~HomophoneReplacer() = default;

std::string HomophoneReplacer::Apply(const std::string &text) const {
return impl_->Apply(text);
return RemoveInvalidUtf8Sequences(impl_->Apply(text));
}

#if __ANDROID_API__ >= 9
Expand Down
Loading