Skip to content

Commit a59eff8

Browse files
committed
Implement Silence Detection
1 parent 6e55582 commit a59eff8

File tree

8 files changed

+445
-126
lines changed

8 files changed

+445
-126
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*
2+
* MIT License
3+
*
4+
* Copyright (c) 2024 squti
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
package com.github.squti.androidwaverecorder
26+
27+
import android.media.AudioFormat
28+
import android.media.AudioRecord
29+
import java.io.File
30+
import java.nio.ByteBuffer
31+
import java.nio.ByteOrder
32+
33+
34+
internal fun calculateMinBufferSize(waveConfig: WaveConfig): Int {
35+
return AudioRecord.getMinBufferSize(
36+
waveConfig.sampleRate,
37+
waveConfig.channels,
38+
waveConfig.audioEncoding
39+
)
40+
}
41+
42+
internal fun calculateAmplitude(data: ByteArray, audioFormat: Int): Int {
43+
return when (audioFormat) {
44+
AudioFormat.ENCODING_PCM_8BIT -> {
45+
val scaleFactor = 32767.0 / 255.0
46+
(data.average().plus(128) * scaleFactor).toInt()
47+
}
48+
49+
AudioFormat.ENCODING_PCM_16BIT -> {
50+
val shortData = ShortArray(data.size / 2)
51+
ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shortData)
52+
shortData.maxOrNull()?.toInt() ?: 0
53+
}
54+
55+
AudioFormat.ENCODING_PCM_32BIT -> {
56+
val intData = IntArray(data.size / 4)
57+
ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer().get(intData)
58+
val maxAmplitude = intData.maxOrNull() ?: 0
59+
val scaledAmplitude = ((maxAmplitude / Int.MAX_VALUE.toFloat()) * 32768).toInt()
60+
scaledAmplitude
61+
}
62+
63+
else -> throw IllegalArgumentException("Unsupported audio format for encoding $audioFormat")
64+
}
65+
}
66+
67+
internal fun calculateAmplitude(data: FloatArray): Int {
68+
val maxFloatAmplitude = data.maxOrNull() ?: 0f
69+
return (maxFloatAmplitude * 32768).toInt()
70+
}
71+
72+
internal fun calculateDurationInMillis(data: ByteArray, waveConfig: WaveConfig): Long {
73+
return when (waveConfig.audioEncoding) {
74+
AudioFormat.ENCODING_PCM_8BIT -> {
75+
(data.size / 1 / channelCount(waveConfig.channels) / waveConfig.sampleRate.toFloat() * 1000).toLong()
76+
}
77+
78+
AudioFormat.ENCODING_PCM_16BIT -> {
79+
(data.size / 2 / channelCount(waveConfig.channels) / waveConfig.sampleRate.toFloat() * 1000).toLong()
80+
}
81+
82+
AudioFormat.ENCODING_PCM_32BIT -> {
83+
(data.size / 4 / channelCount(waveConfig.channels) / waveConfig.sampleRate.toFloat() * 1000).toLong()
84+
}
85+
86+
else -> throw IllegalArgumentException("Unsupported audio format for encoding ${waveConfig.audioEncoding}")
87+
}
88+
}
89+
90+
internal fun calculateDurationInMillis(data: FloatArray, waveConfig: WaveConfig): Long {
91+
return (data.size / channelCount(waveConfig.channels) / waveConfig.sampleRate.toFloat() * 1000).toLong()
92+
}
93+
94+
internal fun calculateDurationInMillis(audioFile: File, waveConfig: WaveConfig): Long {
95+
val bytesPerSample = bitPerSample(waveConfig.audioEncoding) / 8
96+
val totalSamplesRead =
97+
(audioFile.length() / bytesPerSample) / channelCount(waveConfig.channels)
98+
return (totalSamplesRead * 1000 / waveConfig.sampleRate)
99+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* MIT License
3+
*
4+
* Copyright (c) 2024 squti
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
package com.github.squti.androidwaverecorder
26+
27+
import java.io.DataOutputStream
28+
import java.nio.ByteBuffer
29+
import java.nio.ByteOrder
30+
import java.util.LinkedList
31+
32+
internal class FileWriter(private val outputStream: DataOutputStream) {
33+
fun writeDataToStream(
34+
lastSkippedData: LinkedList<ByteArray>,
35+
data: ByteArray
36+
) {
37+
if (lastSkippedData.isNotEmpty()) {
38+
lastSkippedData.forEach { outputStream.write(it) }
39+
}
40+
lastSkippedData.clear()
41+
outputStream.write(data)
42+
}
43+
44+
fun writeDataToStream(
45+
lastSkippedData: LinkedList<FloatArray>,
46+
data: FloatArray
47+
) {
48+
if (lastSkippedData.isNotEmpty()) {
49+
lastSkippedData.forEach { floatArray ->
50+
floatArray.forEach {
51+
val bytes = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN)
52+
.putFloat(it).array()
53+
outputStream.write(bytes)
54+
}
55+
}
56+
}
57+
lastSkippedData.clear()
58+
data.forEach {
59+
val bytes =
60+
ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(it)
61+
.array()
62+
outputStream.write(bytes)
63+
}
64+
}
65+
66+
}

android-wave-recorder/src/main/java/com/github/squti/androidwaverecorder/RecorderState.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,5 @@
2525
package com.github.squti.androidwaverecorder
2626

2727
enum class RecorderState {
28-
RECORDING, PAUSE, STOP
28+
RECORDING, PAUSE, STOP, SKIPPING_SILENCE
2929
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* MIT License
3+
*
4+
* Copyright (c) 2024 squti
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
package com.github.squti.androidwaverecorder
26+
27+
/**
28+
* Configuration for silence detection and handling during audio recording.
29+
*
30+
* @property [minAmplitudeThreshold] The minimum amplitude level (1 to 32767) considered as non-silent.
31+
* @property [bufferDurationInMillis] The duration (in milliseconds) of audio data buffered when silence is detected. It captures the last seconds of silence.
32+
* @property [preSilenceDurationInMillis] The duration (in milliseconds) of audio data recorded before silence is detected. It captures the last seconds leading up to silence.
33+
*/
34+
data class SilenceDetectionConfig(
35+
var minAmplitudeThreshold: Int,
36+
var bufferDurationInMillis: Long = 2000,
37+
var preSilenceDurationInMillis: Long = 2000,
38+
)

android-wave-recorder/src/main/java/com/github/squti/androidwaverecorder/WaveConfig.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,11 @@ internal fun bitPerSample(audioEncoding: Int) = when (audioEncoding) {
4343
AudioFormat.ENCODING_PCM_16BIT -> 16
4444
AudioFormat.ENCODING_PCM_32BIT -> 32
4545
AudioFormat.ENCODING_PCM_FLOAT -> 32
46-
else -> 16
46+
else -> throw IllegalArgumentException("Unsupported audio format for encoding $audioEncoding")
47+
}
48+
49+
internal fun channelCount(channels: Int) = when (channels) {
50+
AudioFormat.CHANNEL_IN_MONO -> 1
51+
AudioFormat.CHANNEL_IN_STEREO -> 2
52+
else -> throw IllegalArgumentException("Unsupported audio channel")
4753
}

0 commit comments

Comments
 (0)