-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Open
Labels
PredictionsRelated to Predictions categoryRelated to Predictions categoryReact NativeReact Native related issueReact Native related issueService TeamIssues asked to the Service TeamIssues asked to the Service Teamfeature-requestRequest a new featureRequest a new feature
Description
Describe the bug
When I send my microphone buffer to Transcribe using Predictions.convert, I only get empty strings back.
I'm not sure whether this should be a bug report or feature request. I'm guessing my audio buffer is formatted incorrectly for Predictions.convert, but the docs don't give enough information to verify that. This may be related to this open issue: #4163
To Reproduce
Steps to reproduce the behavior:
- Follow the Amplify React-Native tutorial to this point: https://docs.amplify.aws/start/getting-started/nextsteps/q/integration/react-native
- Import this module to read the microphone stream: https://github.com/chadsmith/react-native-microphone-stream (the only one i've found that works with React Native)
- Convert the buffer using the pcmEncode function here: https://github.com/aws-samples/amazon-transcribe-websocket-static/blob/master/lib/audioUtils.js
- Send buffer using Predictions.convert as described here: https://docs.amplify.aws/lib/predictions/transcribe/q/platform/js#working-with-the-api
- Build app on Android phone (tested on Pixel 2). Verify that the app has microphone permissions. Press Start and talk into the microphone.
Expected behavior
Expected a transcription of the spoken text to return -- instead got only empty strings back.
Code Snippet
My App.js is here:
import React, {useState} from 'react';
import {
View,
Text,
StyleSheet,
TextInput,
Button,
TouchableOpacity,
} from 'react-native';
import Amplify from 'aws-amplify';
import Predictions, {
AmazonAIPredictionsProvider,
} from '@aws-amplify/predictions';
import awsconfig from './aws-exports';
// import LiveAudioStream from 'react-native-live-audio-stream';
// import AudioRecord from 'react-native-audio-record';
import MicStream from 'react-native-microphone-stream';
// from https://github.com/aws-samples/amazon-transcribe-websocket-static/tree/master/lib
const util_utf8_node = require('@aws-sdk/util-utf8-node'); // utilities for encoding and decoding UTF8
const marshaller = require('@aws-sdk/eventstream-marshaller'); // for converting binary event stream messages to and from JSON
const eventStreamMarshaller = new marshaller.EventStreamMarshaller(
util_utf8_node.toUtf8,
util_utf8_node.fromUtf8,
);
Amplify.configure(awsconfig);
Amplify.addPluggable(new AmazonAIPredictionsProvider());
const initialState = {name: '', description: ''};
global.Buffer = global.Buffer || require('buffer').Buffer;
function VoiceCapture() {
const [text, setText] = useState('');
// from https://github.com/aws-samples/amazon-transcribe-websocket-static/tree/master/lib
function pcmEncode(input) {
var offset = 0;
var buffer = new ArrayBuffer(input.length * 2);
var view = new DataView(buffer);
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return buffer;
}
async function transcribe(bytes) {
await Predictions.convert({
transcription: {
source: {
bytes,
},
language: 'en-US',
},
})
.then(({transcription: {fullText}}) => console.log({fullText}))
.catch((err) => console.log({err}));
}
var listener = MicStream.addListener((data) => {
// console.log(data);
// encode the mic input
let pcmEncodedBuffer = pcmEncode(data);
// // add the right JSON headers and structure to the message
// let audioEventMessage = getAudioEventMessage(
// global.Buffer.from(pcmEncodedBuffer),
// );
// //convert the JSON object + headers into a binary event stream message
// let binary = eventStreamMarshaller.marshall(audioEventMessage);
// the docs say this takes a PCM Audio byte buffer, so i assume the wrappers above aren't necessary. Tried them anyways with no luck.
// (https://docs.amplify.aws/lib/predictions/transcribe/q/platform/js#set-up-the-backend)
transcribe(pcmEncodedBuffer);
});
function startTranscribing() {
MicStream.init({
bufferSize: 4096 * 32, // tried multiplying this buffer size to send longer - still no luck
// sampleRate: 44100,
sampleRate: 16000,
bitsPerChannel: 16,
channelsPerFrame: 1,
});
MicStream.start();
console.log('Started mic stream');
}
function stopTranscribing() {
MicStream.stop();
listener.remove();
}
return (
<View style={styles.container}>
<View style={styles.horizontalView}>
<TouchableOpacity
style={styles.mediumButton}
onPress={() => {
// Voice.start('en_US');
// transcribeAudio();
startTranscribing();
}}>
<Text style={styles.mediumButtonText}>START</Text>
</TouchableOpacity>
<TouchableOpacity
style={styles.mediumButton}
onPress={() => {
stopTranscribing();
}}>
<Text style={styles.mediumButtonText}>STOP</Text>
</TouchableOpacity>
</View>
<TextInput
style={styles.editableText}
multiline
onChangeText={(editedText) => setText(editedText)}>
{text}
</TextInput>
</View>
);
}
const App = () => {
return (
<View style={styles.container}>
<VoiceCapture />
</View>
);
};
export const colors = {
primary: '#0049bd',
white: '#ffffff',
};
export const padding = {
sm: 8,
md: 16,
lg: 24,
xl: 32,
};
const styles = StyleSheet.create({
container: {
flex: 1,
backgroundColor: 'white',
},
bodyText: {
fontSize: 16,
height: 20,
fontWeight: 'normal',
fontStyle: 'normal',
},
mediumButtonText: {
fontSize: 16,
height: 20,
fontWeight: 'normal',
fontStyle: 'normal',
color: colors.white,
},
smallBodyText: {
fontSize: 14,
height: 18,
fontWeight: 'normal',
fontStyle: 'normal',
},
mediumButton: {
alignItems: 'center',
justifyContent: 'center',
width: 132,
height: 48,
padding: padding.md,
margin: 14,
backgroundColor: colors.primary,
fontSize: 20,
fontStyle: 'normal',
elevation: 1,
shadowOffset: {width: 1, height: 1},
shadowOpacity: 0.2,
shadowRadius: 2,
borderRadius: 2,
},
editableText: {
textAlign: 'left',
textAlignVertical: 'top',
borderColor: 'black',
borderWidth: 2,
padding: padding.md,
margin: 14,
flex: 5,
fontSize: 16,
height: 20,
},
horizontalView: {
flex: 1,
flexDirection: 'row',
alignItems: 'stretch',
justifyContent: 'center',
},
});
export default App;
ashirkhan94
Metadata
Metadata
Assignees
Labels
PredictionsRelated to Predictions categoryRelated to Predictions categoryReact NativeReact Native related issueReact Native related issueService TeamIssues asked to the Service TeamIssues asked to the Service Teamfeature-requestRequest a new featureRequest a new feature