You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
Hello
I want to do "Automatic Speech Recognition with Speaker Diarization" based on the link: this but I got error in this line:
word_hyp, word_ts_hyp = asr_decoder_ts.run_ASR(asr_model)
it is : TypeError: _get_batch_preds() takes 1 positional argument but 2 were given
it is my run:
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /home/a/PycharmProjects/Speaker_identification/speaker_identify.py:110 │
│ in │
│ │
│ 107 from nemo.collections.asr.parts.utils.decoder_timestamps_utils import │
│ 108 asr_decoder_ts = ASRDecoderTimeStamps(cfg.diarizer) │
│ 109 asr_model = asr_decoder_ts.set_asr_model() │
│ ❱ 110 asr_decoder_ts.run_ASR(asr_model) │
│ 111 │
│ 112 print("Decoded word output dictionary: \n", word_hyp['an4_diarize_test │
│ 113 print("Word-level timestamps dictionary: \n", word_ts_hyp['an4_diarize │
│ │
│ /home/a/NeMo/nemo/collections/asr/parts/utils/decoder_timestamps_utils. │
│ py:656 in run_ASR_BPE_CTC │
│ │
│ 653 │ │ │ │ logging.info(f"[{idx+1}/{len(self.audio_file_list)}] F │
│ 654 │ │ │ │ frame_asr.clear_buffer() │
│ 655 │ │ │ │ │
│ ❱ 656 │ │ │ │ hyp, greedy_predictions_list, log_prob = get_wer_feat_ │
│ 657 │ │ │ │ │ audio_file_path, │
│ 658 │ │ │ │ │ frame_asr, │
│ 659 │ │ │ │ │ self.chunk_len_in_sec, │
│ │
│ /home/a/NeMo/nemo/collections/asr/parts/utils/decoder_timestamps_utils. │
│ py:231 in get_wer_feat_logit │
│ │
│ 228 │ """ │
│ 229 │ asr.reset() │
│ 230 │ asr.read_audio_file_and_return(audio_file_path, delay, model_strid │
│ ❱ 231 │ hyp, tokens, log_prob = asr.transcribe_with_ts(tokens_per_chunk, d │
│ 232 │ return hyp, tokens, log_prob │
│ 233 │
│ 234 │
│ │
│ /home/a/NeMo/nemo/collections/asr/parts/utils/decoder_timestamps_utils. │
│ py:284 in transcribe_with_ts │
│ │
│ 281 │ def transcribe_with_ts( │
│ 282 │ │ self, tokens_per_chunk: int, delay: int, │
│ 283 │ ): │
│ ❱ 284 │ │ self.infer_logits() │
│ 285 │ │ self.unmerged = [] │
│ 286 │ │ self.part_logprobs = [] │
│ 287 │ │ for idx, pred in enumerate(self.all_preds): │
│ │
│ /home/a/.local/lib/python3.8/site-packages/torch/utils/_contextlib.py:1 │
│ 15 in decorate_context │
│ │
│ 112 │ @functools.wraps(func) │
│ 113 │ def decorate_context(*args, **kwargs): │
│ 114 │ │ with ctx_factory(): │
│ ❱ 115 │ │ │ return func(*args, **kwargs) │
│ 116 │ │
│ 117 │ return decorate_context │
│ 118 │
│ │
│ /home/a/NeMo/nemo/collections/asr/parts/utils/streaming_utils.py:762 in │
│ infer_logits │
│ │
│ 759 │ │ while len(frame_buffers) > 0: │
│ 760 │ │ │ self.frame_buffers += frame_buffers[:] │
│ 761 │ │ │ self.data_layer.set_signal(frame_buffers[:]) │
│ ❱ 762 │ │ │ self._get_batch_preds(keep_logits) │
│ 763 │ │ │ frame_buffers = self.frame_bufferer.get_buffers_batch() │
│ 764 │ │
│ 765 │ @torch.no_grad() │
│ │
│ /home/a/.local/lib/python3.8/site-packages/torch/utils/_contextlib.py:1 │
│ 15 in decorate_context │
│ │
│ 112 │ @functools.wraps(func) │
│ 113 │ def decorate_context(*args, **kwargs): │
│ 114 │ │ with ctx_factory(): │
│ ❱ 115 │ │ │ return func(*args, **kwargs) │
│ 116 │ │
│ 117 │ return decorate_context │
│ 118 │
╰──────────────────────────────────────────────────────────────────────────────╯
How I can fix it?
Beta Was this translation helpful? Give feedback.
All reactions